training_args for all TrainingArguments (#2082)

huggingface · Sep 19, 2024 · 10c2f63 · 10c2f63
1 parent 9fb871f
commit 10c2f63
Show file tree

Hide file tree

Showing 27 changed files with 192 additions and 188 deletions.
diff --git a/docs/source/cpo_trainer.mdx b/docs/source/cpo_trainer.mdx
@@ -61,13 +61,13 @@ The CPO trainer expects a model of `AutoModelForCausalLM`, compared to PPO that
 For a detailed example have a look at the `examples/scripts/cpo.py` script. At a high level we need to initialize the `CPOTrainer` with a `model` we wish to train. **Note that CPOTrainer eliminates the need to use the reference model, simplifying the optimization process.** The `beta` refers to the hyperparameter of the implicit reward, and the dataset contains the 3 entries listed above.
 
 ```py
-cpo_config = CPOConfig(
+training_args = CPOConfig(
     beta=0.1,
 )
 
 cpo_trainer = CPOTrainer(
     model,
-    args=cpo_config,
+    args=training_args,
     train_dataset=train_dataset,
     tokenizer=tokenizer,
 )

diff --git a/docs/source/gkd_trainer.md b/docs/source/gkd_trainer.md
@@ -67,11 +67,11 @@ eval_dataset = Dataset.from_dict(
     }
 )
 
-args = GKDConfig(output_dir="gkd-model", per_device_train_batch_size=1)
+training_args = GKDConfig(output_dir="gkd-model", per_device_train_batch_size=1)
 trainer = GKDTrainer(
     model=model,
     teacher_model=teacher_model,
-    args=args,
+    args=training_args,
     tokenizer=tokenizer,
     train_dataset=train_dataset,
     eval_dataset=eval_dataset,

diff --git a/docs/source/nash_md_trainer.md b/docs/source/nash_md_trainer.md
@@ -34,11 +34,11 @@ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
 reward_model = AutoModelForSequenceClassification.from_pretrained("trl-lib/Qwen2-0.5B-Reward", num_labels=1)
 train_dataset = load_dataset("trl-lib/ultrafeedback-prompt", split="train")
 
-args = NashMDConfig(output_dir="nash-md-qwen2", logging_steps=10)
+training_args = NashMDConfig(output_dir="nash-md-qwen2", logging_steps=10)
 trainer = NashMDTrainer(
     model=model,
     reward_model=reward_model,
-    args=args,
+    args=training_args,
     tokenizer=tokenizer,
     train_dataset=train_dataset,
 )
@@ -66,7 +66,7 @@ Make sure that the SFT model and reward model use the _same_ chat template. Othe
 We can want the model to generate completion within a given length. During the learning, the model will generate completion up to the maximum completion length specified in the `max_new_tokens` argument of [`NashMDConfig`]. I you want to penalize for not generating an EOS token before the maximum completion length, you can use the `missing_eos_penalty` argument of [`NashMDConfig`]:
 
 ```python
-args = NashMDConfig(..., max_new_tokens=128, missing_eos_penalty=1.0)
+training_args = NashMDConfig(..., max_new_tokens=128, missing_eos_penalty=1.0)
 ```
 
 ### Logging Completions

diff --git a/docs/source/online_dpo_trainer.md b/docs/source/online_dpo_trainer.md
@@ -36,11 +36,11 @@ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
 reward_model = AutoModelForSequenceClassification.from_pretrained("trl-lib/Qwen2-0.5B-Reward", num_labels=1)
 train_dataset = load_dataset("trl-lib/ultrafeedback-prompt", split="train")
 
-args = OnlineDPOConfig(output_dir="online-dpo-qwen2", logging_steps=10)
+training_args = OnlineDPOConfig(output_dir="online-dpo-qwen2", logging_steps=10)
 trainer = OnlineDPOTrainer(
     model=model,
     reward_model=reward_model,
-    args=args,
+    args=training_args,
     tokenizer=tokenizer,
     train_dataset=train_dataset,
 )
@@ -85,7 +85,7 @@ Make sure that the SFT model and reward model use the _same_ chat template. Othe
 We can want the model to generate completion within a given length. During the learning, the model will generate completion up to the maximum completion length specified in the `max_new_tokens` argument of [`OnlineDPOConfig`]. I you want to penalize for not generating an EOS token before the maximum completion length, you can use the `missing_eos_penalty` argument of [`OnlineDPOConfig`]:
 
 ```python
-args = OnlineDPOConfig(..., max_new_tokens=128, missing_eos_penalty=1.0)
+training_args = OnlineDPOConfig(..., max_new_tokens=128, missing_eos_penalty=1.0)
 ```
 
 ### Logging Completions

diff --git a/docs/source/orpo_trainer.md b/docs/source/orpo_trainer.md
@@ -56,13 +56,13 @@ The ORPO trainer expects a model of `AutoModelForCausalLM`, compared to PPO that
 For a detailed example have a look at the `examples/scripts/orpo.py` script. At a high level we need to initialize the `ORPOTrainer` with a `model` we wish to train. **Note that ORPOTrainer eliminates the need to use the reference model, simplifying the optimization process.** The `beta` refers to the hyperparameter `lambda` in eq. (6) of the paper and refers to the weighting of the relative odd ratio loss in the standard cross-entropy loss used for SFT.
 
 ```py
-orpo_config = ORPOConfig(
+training_args = ORPOConfig(
     beta=0.1, # the lambda/alpha hyperparameter in the paper/code
 )
 
 orpo_trainer = ORPOTrainer(
     model,
-    args=orpo_config,
+    args=training_args,
     train_dataset=train_dataset,
     tokenizer=tokenizer,
 )

diff --git a/docs/source/reward_trainer.mdx b/docs/source/reward_trainer.mdx
@@ -79,7 +79,7 @@ $$\Big( R(p, r_1) + R(p, r_2) \Big)^2 $$
 This auxiliary loss is combined with the main loss function, weighted by the parameter `center_rewards_coefficient` in the `[RewardConfig]`. By default, this feature is deactivated (`center_rewards_coefficient = None`).
 
 ```python
-reward_config = RewardConfig(
+training_args = RewardConfig(
     center_rewards_coefficient=0.01,
     ...
 )

diff --git a/docs/source/sft_trainer.mdx b/docs/source/sft_trainer.mdx
@@ -16,15 +16,15 @@ from trl import SFTConfig, SFTTrainer
 
 dataset = load_dataset("stanfordnlp/imdb", split="train")
 
-sft_config = SFTConfig(
+training_args = SFTConfig(
     dataset_text_field="text",
     max_seq_length=512,
     output_dir="/tmp",
 )
 trainer = SFTTrainer(
     "facebook/opt-350m",
     train_dataset=dataset,
-    args=sft_config,
+    args=training_args,
 )
 trainer.train()
 ```
@@ -41,12 +41,12 @@ dataset = load_dataset("stanfordnlp/imdb", split="train")
 
 model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m")
 
-sft_config = SFTConfig(output_dir="/tmp")
+training_args = SFTConfig(output_dir="/tmp")
 
 trainer = SFTTrainer(
     model,
     train_dataset=dataset,
-    args=sft_config,
+    args=training_args,
 )
 
 trainer.train()
@@ -220,10 +220,10 @@ dataset = load_dataset("philschmid/dolly-15k-oai-style", split="train")
 
 ...
 
-sft_config = SFTConfig(packing=True)
+training_args = SFTConfig(packing=True)
 trainer = SFTTrainer(
     "facebook/opt-350m",
-    args=sft_config,
+    args=training_args,
     train_dataset=dataset,
 )
 ```
@@ -256,7 +256,7 @@ def formatting_prompts_func(example):
 
 trainer = SFTTrainer(
     model,
-    args=sft_config,
+    args=training_args,
     train_dataset=dataset,
     formatting_func=formatting_prompts_func,
 )
@@ -271,12 +271,12 @@ To properly format your input make sure to process all the examples by looping o
 
 ```python
 ...
-sft_config = SFTConfig(packing=True, dataset_text_field="text",)
+training_args = SFTConfig(packing=True, dataset_text_field="text",)
 
 trainer = SFTTrainer(
     "facebook/opt-350m",
     train_dataset=dataset,
-    args=sft_config
+    args=training_args
 )
 
 trainer.train()
@@ -294,11 +294,11 @@ def formatting_func(example):
     text = f"### Question: {example['question']}\n ### Answer: {example['answer']}"
     return text
 
-sft_config = SFTConfig(packing=True)
+training_args = SFTConfig(packing=True)
 trainer = SFTTrainer(
     "facebook/opt-350m",
     train_dataset=dataset,
-    args=sft_config,
+    args=training_args,
     formatting_func=formatting_func
 )
 
@@ -315,7 +315,7 @@ model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", torch_dtype=to
 
 ...
 
-sft_config = SFTConfig(
+training_args = SFTConfig(
     model_init_kwargs={
         "torch_dtype": "bfloat16",
     },
@@ -324,7 +324,7 @@ sft_config = SFTConfig(
 trainer = SFTTrainer(
     "facebook/opt-350m",
     train_dataset=dataset,
-    args=sft_config,
+    args=training_args,
 )
 
 trainer.train()
@@ -510,13 +510,13 @@ from trl import SFTConfig, SFTTrainer
 
 dataset = load_dataset("stanfordnlp/imdb", split="train")
 
-sft_config = SFTConfig(
+training_args = SFTConfig(
     neftune_noise_alpha=5,
 )
 trainer = SFTTrainer(
     "facebook/opt-350m",
     train_dataset=dataset,
-    args=sft_config,
+    args=training_args,
 )
 trainer.train()
 ```
@@ -578,15 +578,15 @@ model = FastLanguageModel.get_peft_model(
     random_state=3407,
 )
 
-args = SFTConfig(
+training_args = SFTConfig(
     output_dir="./output",
     max_seq_length=max_seq_length,
     dataset_text_field="text",
 )
 
 trainer = SFTTrainer(
     model=model,
-    args=args,
+    args=training_args,
     train_dataset=dataset,
 )
 trainer.train()
@@ -611,10 +611,10 @@ With great memory reduction, you can potentially turn off cpu_offloading or grad
 pip install liger-kernel
 ```
 
-2. Once installed, set `use_liger` in [SFTConfig](https://github.com/huggingface/trl/blob/850ddcf598984013007d384c6b3e311def2a616e/trl/trainer/sft_config.py#L69). No other changes are needed!
+2. Once installed, set `use_liger` in [`SFTConfig`]. No other changes are needed!
 
 ```python
-config = SFTConfig(
+training_args = SFTConfig(
   use_liger=True
 )
 ```
@@ -742,13 +742,13 @@ print(collated_data.keys())  # dict_keys(['input_ids', 'attention_mask', 'pixel_
 Now that we have prepared the data and defined the collator, we can proceed with training the model. To ensure that the data is not processed as text-only, we need to set a couple of arguments in the `SFTConfig`, specifically `dataset_text_field` and `remove_unused_columns`. We also need to set `skip_prepare_dataset` to `True` to avoid the default processing of the dataset. Below is an example of how to set up the `SFTTrainer`.
 
 ```python
-args.dataset_text_field = ""  # needs a dummy field
-args.remove_unused_columns = False
-args.dataset_kwargs = {"skip_prepare_dataset": True}
+training_args.dataset_text_field = ""  # needs a dummy field
+training_args.remove_unused_columns = False
+training_args.dataset_kwargs = {"skip_prepare_dataset": True}
 
 trainer = SFTTrainer(
     model=model,
-    args=args,
+    args=training_args,
     data_collator=collate_fn,
     train_dataset=train_dataset,
     tokenizer=processor.tokenizer,

diff --git a/docs/source/xpo_trainer.mdx b/docs/source/xpo_trainer.mdx
@@ -34,11 +34,11 @@ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
 reward_model = AutoModelForSequenceClassification.from_pretrained("trl-lib/Qwen2-0.5B-Reward", num_labels=1)
 train_dataset = load_dataset("trl-lib/ultrafeedback-prompt", split="train")
 
-args = XPOConfig(output_dir="nash-md-qwen2", logging_steps=10)
+training_args = XPOConfig(output_dir="nash-md-qwen2", logging_steps=10)
 trainer = XPOTrainer(
     model=model,
     reward_model=reward_model,
-    args=args,
+    args=training_args,
     tokenizer=tokenizer,
     train_dataset=train_dataset,
 )
@@ -66,7 +66,7 @@ Make sure that the SFT model and reward model use the _same_ chat template. Othe
 We can want the model to generate completion within a given length. During the learning, the model will generate completion up to the maximum completion length specified in the `max_new_tokens` argument of [`XPOConfig`]. I you want to penalize for not generating an EOS token before the maximum completion length, you can use the `missing_eos_penalty` argument of [`XPOConfig`]:
 
 ```python
-args = XPOConfig(..., max_new_tokens=128, missing_eos_penalty=1.0)
+training_args = XPOConfig(..., max_new_tokens=128, missing_eos_penalty=1.0)
 ```
 
 ### Logging Completions

diff --git a/examples/scripts/alignprop.py b/examples/scripts/alignprop.py
@@ -106,8 +106,8 @@ def image_outputs_logger(image_pair_data, global_step, accelerate_logger):
 
 if __name__ == "__main__":
     parser = HfArgumentParser((ScriptArguments, AlignPropConfig))
-    args, alignprop_config = parser.parse_args_into_dataclasses()
-    alignprop_config.project_kwargs = {
+    args, training_args = parser.parse_args_into_dataclasses()
+    training_args.project_kwargs = {
         "logging_dir": "./logs",
         "automatic_checkpoint_naming": True,
         "total_limit": 5,
@@ -118,7 +118,7 @@ def image_outputs_logger(image_pair_data, global_step, accelerate_logger):
         args.pretrained_model, pretrained_model_revision=args.pretrained_revision, use_lora=args.use_lora
     )
     trainer = AlignPropTrainer(
-        alignprop_config,
+        training_args,
         aesthetic_scorer(args.hf_hub_aesthetic_model_id, args.hf_hub_aesthetic_model_filename),
         prompt_fn,
         pipeline,

diff --git a/examples/scripts/bco.py b/examples/scripts/bco.py
@@ -175,9 +175,9 @@ def mean_pooling(model_output, attention_mask):
 
 if __name__ == "__main__":
     parser = HfArgumentParser((ScriptArguments, BCOConfig, ModelConfig))
-    script_args, bco_args, model_args = parser.parse_args_into_dataclasses()
+    script_args, training_args, model_args = parser.parse_args_into_dataclasses()
 
-    bco_args.gradient_checkpointing_kwargs = {"use_reentrant": True}
+    training_args.gradient_checkpointing_kwargs = {"use_reentrant": True}
 
     # Load a pretrained model
     model = AutoModelForCausalLM.from_pretrained(
@@ -208,8 +208,8 @@ def format_dataset(example):
     # see: https://github.com/huggingface/trl/pull/1255
     with PartialState().local_main_process_first():
         # Load the dataset
-        dataset = build_helpfulness_dataset(script_args.llm_name, num_proc=bco_args.dataset_num_proc)
-        dataset = dataset.map(format_dataset, batched=False, num_proc=bco_args.dataset_num_proc)
+        dataset = build_helpfulness_dataset(script_args.llm_name, num_proc=training_args.dataset_num_proc)
+        dataset = dataset.map(format_dataset, batched=False, num_proc=training_args.dataset_num_proc)
 
     accelerator = Accelerator()
     embedding_model = AutoModel.from_pretrained(
@@ -232,7 +232,7 @@ def format_dataset(example):
     bco_trainer = BCOTrainer(
         model,
         ref_model,
-        args=bco_args,
+        args=training_args,
         train_dataset=dataset["train"],
         eval_dataset=dataset["test"],
         tokenizer=tokenizer,
@@ -243,4 +243,4 @@ def format_dataset(example):
 
     # Train and push the model to the Hub
     bco_trainer.train()
-    bco_trainer.save_model(bco_args.output_dir)
+    bco_trainer.save_model(training_args.output_dir)
diff --git a/examples/scripts/cpo.py b/examples/scripts/cpo.py
@@ -72,7 +72,7 @@ class ScriptArguments:
 
 if __name__ == "__main__":
     parser = HfArgumentParser((ScriptArguments, CPOConfig, ModelConfig))
-    args, cpo_args, model_config = parser.parse_args_into_dataclasses()
+    args, training_args, model_config = parser.parse_args_into_dataclasses()
 
     ################
     # Model & Tokenizer
@@ -101,14 +101,14 @@ def process(row):
     # Compute that only on the main process for faster data processing.
     # see: https://github.com/huggingface/trl/pull/1255
     with PartialState().local_main_process_first():
-        dataset = dataset.map(process, num_proc=cpo_args.dataset_num_proc)
+        dataset = dataset.map(process, num_proc=training_args.dataset_num_proc)
 
     ################
     # Training
     ################
     trainer = CPOTrainer(
         model,
-        args=cpo_args,
+        args=training_args,
         train_dataset=dataset["train"],
         eval_dataset=dataset["test"],
         tokenizer=tokenizer,
@@ -117,4 +117,4 @@ def process(row):
 
     # train and save the model
     trainer.train()
-    trainer.save_model(cpo_args.output_dir)
+    trainer.save_model(training_args.output_dir)