brave-yak-3559
10/09/2023, 8:33 PM@step
def load_data(self):
from datasets import load_dataset
with S3() as s3:
s3obj = s3.get(self.org_data)
dataset = load_dataset("csv",data_files=s3obj.path)
self.dataset = dataset
print(self.dataset)
self.next(self.eda)
@step
def eda(self):
print(self.dataset)
error:
File "flow.py", line 133, in pre_nightingale_eda
print(self.dataset)
File "/flow/metaflow/metaflow/flowspec.py", line 224, in __getattr__
x = self._datastore[name]
File "/flow/metaflow/metaflow/datastore/task_datastore.py", line 45, in method
return f(self, args, kwargs)
File "/flow/metaflow/metaflow/datastore/task_datastore.py", line 836, in __getitem__
_, obj = next(self.load_artifacts([name]))
File "/flow/metaflow/metaflow/datastore/task_datastore.py", line 370, in load_artifacts
yield name, pickle.loads(blob)
File "/usr/local/lib/python3.8/site-packages/datasets/table.py", line 1069, in __setstate__
table = _memory_mapped_arrow_table_from_file(path)
File "/usr/local/lib/python3.8/site-packages/datasets/table.py", line 65, in _memory_mapped_arrow_table_from_file
opened_stream = _memory_mapped_record_batch_reader_from_file(filename)
File "/usr/local/lib/python3.8/site-packages/datasets/table.py", line 50, in _memory_mapped_record_batch_reader_from_file
memory_mapped_stream = pa.memory_map(filename)
File "pyarrow/io.pxi", line 1009, in pyarrow.lib.memory_map
File "pyarrow/io.pxi", line 956, in pyarrow.lib.MemoryMappedFile._open
File "pyarrow/error.pxi", line 144, in pyarrow.lib.pyarrow_internal_check_status
File "pyarrow/error.pxi", line 113, in pyarrow.lib.check_status
FileNotFoundError: [Errno 2] Failed to open local file '/root/.cache/huggingface/datasets/csv/default-42f4d214a7c91375/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d/csv-train-00000-of-00009.arrow'. Detail: [errno 2] No such file or directory