diff --git a/model/reward/instructor/experimental_dataset.py b/model/reward/instructor/experimental_dataset.py index d8fb60d79c..8ff4f9e7a9 100644 --- a/model/reward/instructor/experimental_dataset.py +++ b/model/reward/instructor/experimental_dataset.py @@ -60,7 +60,7 @@ class HFSummaryQuality(Dataset): def __init__(self, split, tokenizer, max_length=300) -> None: super().__init__() assert split in ("validation", "test") - dataset = load_dataset("Tristan/summarize_from_feedback", "axis")[split] + dataset = load_dataset("openai/summarize_from_feedback", "axis")[split] self.max_length = max_length mean_scores = defaultdict(list) self.contexts = [] diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py index f63af85ad0..a5c4b4fdbd 100644 --- a/model/reward/instructor/rank_datasets.py +++ b/model/reward/instructor/rank_datasets.py @@ -118,7 +118,7 @@ def __init__(self, split="train", conf_threshold=-1, max_comparison_per_sample=3 self.index2summary = {} self.max_comparison_per_sample = max_comparison_per_sample major_split = split if "train" == split else "validation" - dataset = load_dataset("Tristan/summarize_from_feedback", "comparisons")[major_split] + dataset = load_dataset("openai/summarize_from_feedback", "comparisons")[major_split] for data in dataset: if ( "extra" in data