Skip to content

Commit 9f45493

Browse files
authored
Merge pull request #1379 from Kaggle/resolve-dataset-issue
fix datasets
2 parents ce896ee + 79c3fd4 commit 9f45493

File tree

2 files changed

+12
-7
lines changed

2 files changed

+12
-7
lines changed

Dockerfile.tmpl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,6 @@ RUN pip install flashtext \
525525
pyemd \
526526
pyupset \
527527
pympler \
528-
s3fs \
529528
featuretools \
530529
#-e git+https://github.com/SohierDane/BigQuery_Helper#egg=bq_helper \
531530
git+https://github.com/Kaggle/learntools \
@@ -561,8 +560,9 @@ RUN pip install pytorch-ignite \
561560
bqplot \
562561
earthengine-api \
563562
transformers \
564-
# b/232247930 >= 2.2.0 requires pyarrow >= 6.0.0 which conflicts with dependencies for rapidsai 0.21.*
565-
datasets==2.1.0 \
563+
datasets \
564+
s3fs \
565+
gcsfs \
566566
kaggle-environments \
567567
geopandas \
568568
"shapely<2" \

tests/test_hf_datasets.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import unittest
22

3-
from datasets import Dataset
4-
3+
import datasets
4+
import pandas as pd
55

66
class TestHuggingFaceDatasets(unittest.TestCase):
77

@@ -10,7 +10,12 @@ def some_func(batch):
1010
batch['label'] = 'foo'
1111
return batch
1212

13-
df = Dataset.from_dict({'text': ['Kaggle rocks!']})
13+
df = datasets.Dataset.from_dict({'text': ['Kaggle rocks!']})
1414
mapped_df = df.map(some_func)
1515

16-
self.assertEqual('foo', mapped_df[0]['label'])
16+
self.assertEqual('foo', mapped_df[0]['label'])
17+
18+
def test_load_dataset(self):
19+
dataset = datasets.load_dataset("csv", data_files="/input/tests/data/train.csv")
20+
full_data = pd.DataFrame(dataset['train'])
21+
self.assertFalse(full_data.empty)

0 commit comments

Comments
 (0)