Skip to content

Commit

Permalink
Fix documentation and type variables in CustomDataset checkpoint meth…
Browse files Browse the repository at this point in the history
…ods (#342)
  • Loading branch information
plaguss authored Feb 14, 2024
1 parent d24ee88 commit 82bc646
Showing 1 changed file with 21 additions and 6 deletions.
27 changes: 21 additions & 6 deletions src/distilabel/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,23 +185,34 @@ def save_to_disk(self, dataset_path: os.PathLike, **kwargs: Any) -> None:
"""Saves the datataset to disk, also saving the task.
Args:
dataset_path: Path to the dataset.
**kwargs: Additional arguments to be passed to `datasets.Dataset.save_to_disk`.
dataset_path (os.PathLike): Path to the dataset.
kwargs (Any): Additional arguments to be passed to `datasets.Dataset.save_to_disk`.
Examples:
>>> from distilabel.dataset import CustomDataset
>>> dataset = CustomDataset(...)
>>> dataset.save_to_disk("path/to/dataset")
"""
super().save_to_disk(dataset_path, **kwargs)
if self.task is not None:
self.task.save(Path(dataset_path))

@classmethod
def load_from_disk(cls, dataset_path: os.PathLike, **kwargs: Any):
def load_from_disk(
cls, dataset_path: os.PathLike, **kwargs: Any
) -> "CustomDataset":
"""Load a CustomDataset from disk, also reading the task.
Args:
dataset_path (os.PathLike): Path to the dataset.
kwargs (Any): Keyword arguments passed to Dataset.load_from_disk.
Returns:
The loaded dataset.
dataset: The loaded dataset.
Examples:
>>> from distilabel.dataset import CustomDataset
>>> dataset: CustomDataset = CustomDataset.load_from_disk("path/to/dataset")
"""
ds = super().load_from_disk(dataset_path, **kwargs)
# Dynamically remaps the `datasets.Dataset` to be a `CustomDataset` instance
Expand All @@ -226,13 +237,17 @@ def push_to_hub(
`<org>/<dataset_name>`. Also accepts `<dataset_name>`, which will default to the namespace
of the logged-in user.
args (Any): Additional arguments to be passed to `datasets.Dataset.push_to_hub`.
push_task (bool, optional): _description_. Defaults to True.
push_task (bool, optional):
Whether to push the `Task` contained in the `CustomDataset`. Useful if you want to resuse
the functionality of the `CustomDataset` out of the box. It will upload a json
file (distilabel-task.json) containing the task to the hub.
Defaults to True.
kwargs (Any): Additional arguments to be passed to `datasets.Dataset.push_to_hub`.
Examples:
>>> from distilabel.dataset import CustomDataset
>>> dataset = CustomDataset(...)
>>> dataset.push_to_hub("path/to/dataset")
>>> dataset.push_to_hub("org/dataset-name")
"""
super().push_to_hub(repo_id, *args, **kwargs)
if self.task is not None and push_task:
Expand Down

0 comments on commit 82bc646

Please sign in to comment.