From 10c2f63b2ac8564cca28aa1598a1f3ac6a5fc63c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= <45557362+qgallouedec@users.noreply.github.com> Date: Thu, 19 Sep 2024 15:03:47 +0200 Subject: [PATCH] `training_args` for all `TrainingArguments` (#2082) --- docs/source/cpo_trainer.mdx | 4 +-- docs/source/gkd_trainer.md | 4 +-- docs/source/nash_md_trainer.md | 6 ++-- docs/source/online_dpo_trainer.md | 6 ++-- docs/source/orpo_trainer.md | 4 +-- docs/source/reward_trainer.mdx | 2 +- docs/source/sft_trainer.mdx | 46 ++++++++++++++--------------- docs/source/xpo_trainer.mdx | 6 ++-- examples/scripts/alignprop.py | 6 ++-- examples/scripts/bco.py | 12 ++++---- examples/scripts/cpo.py | 8 ++--- examples/scripts/ddpo.py | 6 ++-- examples/scripts/kto.py | 10 +++---- examples/scripts/orpo.py | 8 ++--- examples/scripts/ppo.py | 12 ++++---- examples/scripts/ppo/ppo.py | 20 ++++++------- examples/scripts/ppo/ppo_tldr.py | 24 +++++++-------- examples/scripts/reward_modeling.py | 20 ++++++------- examples/scripts/rloo/rloo.py | 24 +++++++-------- examples/scripts/rloo/rloo_tldr.py | 22 +++++++------- tests/slow/test_sft_slow.py | 40 ++++++++++++------------- tests/test_alignprop_trainer.py | 8 ++--- tests/test_callbacks.py | 12 ++++---- tests/test_ddpo_trainer.py | 8 ++--- tests/test_dpo_trainer.py | 24 +++++++-------- tests/test_iterative_sft_trainer.py | 4 +-- tests/test_trainers_args.py | 34 ++++++++++++--------- 27 files changed, 192 insertions(+), 188 deletions(-) diff --git a/docs/source/cpo_trainer.mdx b/docs/source/cpo_trainer.mdx index 05c0f40cf9..39c80f3acc 100644 --- a/docs/source/cpo_trainer.mdx +++ b/docs/source/cpo_trainer.mdx @@ -61,13 +61,13 @@ The CPO trainer expects a model of `AutoModelForCausalLM`, compared to PPO that For a detailed example have a look at the `examples/scripts/cpo.py` script. At a high level we need to initialize the `CPOTrainer` with a `model` we wish to train. **Note that CPOTrainer eliminates the need to use the reference model, simplifying the optimization process.** The `beta` refers to the hyperparameter of the implicit reward, and the dataset contains the 3 entries listed above. ```py -cpo_config = CPOConfig( +training_args = CPOConfig( beta=0.1, ) cpo_trainer = CPOTrainer( model, - args=cpo_config, + args=training_args, train_dataset=train_dataset, tokenizer=tokenizer, ) diff --git a/docs/source/gkd_trainer.md b/docs/source/gkd_trainer.md index 4801c35c32..e6513cb5cc 100644 --- a/docs/source/gkd_trainer.md +++ b/docs/source/gkd_trainer.md @@ -67,11 +67,11 @@ eval_dataset = Dataset.from_dict( } ) -args = GKDConfig(output_dir="gkd-model", per_device_train_batch_size=1) +training_args = GKDConfig(output_dir="gkd-model", per_device_train_batch_size=1) trainer = GKDTrainer( model=model, teacher_model=teacher_model, - args=args, + args=training_args, tokenizer=tokenizer, train_dataset=train_dataset, eval_dataset=eval_dataset, diff --git a/docs/source/nash_md_trainer.md b/docs/source/nash_md_trainer.md index e0d931d187..9d380cb4c0 100644 --- a/docs/source/nash_md_trainer.md +++ b/docs/source/nash_md_trainer.md @@ -34,11 +34,11 @@ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct") reward_model = AutoModelForSequenceClassification.from_pretrained("trl-lib/Qwen2-0.5B-Reward", num_labels=1) train_dataset = load_dataset("trl-lib/ultrafeedback-prompt", split="train") -args = NashMDConfig(output_dir="nash-md-qwen2", logging_steps=10) +training_args = NashMDConfig(output_dir="nash-md-qwen2", logging_steps=10) trainer = NashMDTrainer( model=model, reward_model=reward_model, - args=args, + args=training_args, tokenizer=tokenizer, train_dataset=train_dataset, ) @@ -66,7 +66,7 @@ Make sure that the SFT model and reward model use the _same_ chat template. Othe We can want the model to generate completion within a given length. During the learning, the model will generate completion up to the maximum completion length specified in the `max_new_tokens` argument of [`NashMDConfig`]. I you want to penalize for not generating an EOS token before the maximum completion length, you can use the `missing_eos_penalty` argument of [`NashMDConfig`]: ```python -args = NashMDConfig(..., max_new_tokens=128, missing_eos_penalty=1.0) +training_args = NashMDConfig(..., max_new_tokens=128, missing_eos_penalty=1.0) ``` ### Logging Completions diff --git a/docs/source/online_dpo_trainer.md b/docs/source/online_dpo_trainer.md index 3dfca04053..1e272ae26f 100644 --- a/docs/source/online_dpo_trainer.md +++ b/docs/source/online_dpo_trainer.md @@ -36,11 +36,11 @@ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct") reward_model = AutoModelForSequenceClassification.from_pretrained("trl-lib/Qwen2-0.5B-Reward", num_labels=1) train_dataset = load_dataset("trl-lib/ultrafeedback-prompt", split="train") -args = OnlineDPOConfig(output_dir="online-dpo-qwen2", logging_steps=10) +training_args = OnlineDPOConfig(output_dir="online-dpo-qwen2", logging_steps=10) trainer = OnlineDPOTrainer( model=model, reward_model=reward_model, - args=args, + args=training_args, tokenizer=tokenizer, train_dataset=train_dataset, ) @@ -85,7 +85,7 @@ Make sure that the SFT model and reward model use the _same_ chat template. Othe We can want the model to generate completion within a given length. During the learning, the model will generate completion up to the maximum completion length specified in the `max_new_tokens` argument of [`OnlineDPOConfig`]. I you want to penalize for not generating an EOS token before the maximum completion length, you can use the `missing_eos_penalty` argument of [`OnlineDPOConfig`]: ```python -args = OnlineDPOConfig(..., max_new_tokens=128, missing_eos_penalty=1.0) +training_args = OnlineDPOConfig(..., max_new_tokens=128, missing_eos_penalty=1.0) ``` ### Logging Completions diff --git a/docs/source/orpo_trainer.md b/docs/source/orpo_trainer.md index 124bc7891b..12358f940b 100644 --- a/docs/source/orpo_trainer.md +++ b/docs/source/orpo_trainer.md @@ -56,13 +56,13 @@ The ORPO trainer expects a model of `AutoModelForCausalLM`, compared to PPO that For a detailed example have a look at the `examples/scripts/orpo.py` script. At a high level we need to initialize the `ORPOTrainer` with a `model` we wish to train. **Note that ORPOTrainer eliminates the need to use the reference model, simplifying the optimization process.** The `beta` refers to the hyperparameter `lambda` in eq. (6) of the paper and refers to the weighting of the relative odd ratio loss in the standard cross-entropy loss used for SFT. ```py -orpo_config = ORPOConfig( +training_args = ORPOConfig( beta=0.1, # the lambda/alpha hyperparameter in the paper/code ) orpo_trainer = ORPOTrainer( model, - args=orpo_config, + args=training_args, train_dataset=train_dataset, tokenizer=tokenizer, ) diff --git a/docs/source/reward_trainer.mdx b/docs/source/reward_trainer.mdx index 8335ecf56a..5a73217ead 100644 --- a/docs/source/reward_trainer.mdx +++ b/docs/source/reward_trainer.mdx @@ -79,7 +79,7 @@ $$\Big( R(p, r_1) + R(p, r_2) \Big)^2 $$ This auxiliary loss is combined with the main loss function, weighted by the parameter `center_rewards_coefficient` in the `[RewardConfig]`. By default, this feature is deactivated (`center_rewards_coefficient = None`). ```python -reward_config = RewardConfig( +training_args = RewardConfig( center_rewards_coefficient=0.01, ... ) diff --git a/docs/source/sft_trainer.mdx b/docs/source/sft_trainer.mdx index fb14c68dc3..99f10006c4 100644 --- a/docs/source/sft_trainer.mdx +++ b/docs/source/sft_trainer.mdx @@ -16,7 +16,7 @@ from trl import SFTConfig, SFTTrainer dataset = load_dataset("stanfordnlp/imdb", split="train") -sft_config = SFTConfig( +training_args = SFTConfig( dataset_text_field="text", max_seq_length=512, output_dir="/tmp", @@ -24,7 +24,7 @@ sft_config = SFTConfig( trainer = SFTTrainer( "facebook/opt-350m", train_dataset=dataset, - args=sft_config, + args=training_args, ) trainer.train() ``` @@ -41,12 +41,12 @@ dataset = load_dataset("stanfordnlp/imdb", split="train") model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m") -sft_config = SFTConfig(output_dir="/tmp") +training_args = SFTConfig(output_dir="/tmp") trainer = SFTTrainer( model, train_dataset=dataset, - args=sft_config, + args=training_args, ) trainer.train() @@ -220,10 +220,10 @@ dataset = load_dataset("philschmid/dolly-15k-oai-style", split="train") ... -sft_config = SFTConfig(packing=True) +training_args = SFTConfig(packing=True) trainer = SFTTrainer( "facebook/opt-350m", - args=sft_config, + args=training_args, train_dataset=dataset, ) ``` @@ -256,7 +256,7 @@ def formatting_prompts_func(example): trainer = SFTTrainer( model, - args=sft_config, + args=training_args, train_dataset=dataset, formatting_func=formatting_prompts_func, ) @@ -271,12 +271,12 @@ To properly format your input make sure to process all the examples by looping o ```python ... -sft_config = SFTConfig(packing=True, dataset_text_field="text",) +training_args = SFTConfig(packing=True, dataset_text_field="text",) trainer = SFTTrainer( "facebook/opt-350m", train_dataset=dataset, - args=sft_config + args=training_args ) trainer.train() @@ -294,11 +294,11 @@ def formatting_func(example): text = f"### Question: {example['question']}\n ### Answer: {example['answer']}" return text -sft_config = SFTConfig(packing=True) +training_args = SFTConfig(packing=True) trainer = SFTTrainer( "facebook/opt-350m", train_dataset=dataset, - args=sft_config, + args=training_args, formatting_func=formatting_func ) @@ -315,7 +315,7 @@ model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", torch_dtype=to ... -sft_config = SFTConfig( +training_args = SFTConfig( model_init_kwargs={ "torch_dtype": "bfloat16", }, @@ -324,7 +324,7 @@ sft_config = SFTConfig( trainer = SFTTrainer( "facebook/opt-350m", train_dataset=dataset, - args=sft_config, + args=training_args, ) trainer.train() @@ -510,13 +510,13 @@ from trl import SFTConfig, SFTTrainer dataset = load_dataset("stanfordnlp/imdb", split="train") -sft_config = SFTConfig( +training_args = SFTConfig( neftune_noise_alpha=5, ) trainer = SFTTrainer( "facebook/opt-350m", train_dataset=dataset, - args=sft_config, + args=training_args, ) trainer.train() ``` @@ -578,7 +578,7 @@ model = FastLanguageModel.get_peft_model( random_state=3407, ) -args = SFTConfig( +training_args = SFTConfig( output_dir="./output", max_seq_length=max_seq_length, dataset_text_field="text", @@ -586,7 +586,7 @@ args = SFTConfig( trainer = SFTTrainer( model=model, - args=args, + args=training_args, train_dataset=dataset, ) trainer.train() @@ -611,10 +611,10 @@ With great memory reduction, you can potentially turn off cpu_offloading or grad pip install liger-kernel ``` -2. Once installed, set `use_liger` in [SFTConfig](https://github.com/huggingface/trl/blob/850ddcf598984013007d384c6b3e311def2a616e/trl/trainer/sft_config.py#L69). No other changes are needed! +2. Once installed, set `use_liger` in [`SFTConfig`]. No other changes are needed! ```python -config = SFTConfig( +training_args = SFTConfig( use_liger=True ) ``` @@ -742,13 +742,13 @@ print(collated_data.keys()) # dict_keys(['input_ids', 'attention_mask', 'pixel_ Now that we have prepared the data and defined the collator, we can proceed with training the model. To ensure that the data is not processed as text-only, we need to set a couple of arguments in the `SFTConfig`, specifically `dataset_text_field` and `remove_unused_columns`. We also need to set `skip_prepare_dataset` to `True` to avoid the default processing of the dataset. Below is an example of how to set up the `SFTTrainer`. ```python -args.dataset_text_field = "" # needs a dummy field -args.remove_unused_columns = False -args.dataset_kwargs = {"skip_prepare_dataset": True} +training_args.dataset_text_field = "" # needs a dummy field +training_args.remove_unused_columns = False +training_args.dataset_kwargs = {"skip_prepare_dataset": True} trainer = SFTTrainer( model=model, - args=args, + args=training_args, data_collator=collate_fn, train_dataset=train_dataset, tokenizer=processor.tokenizer, diff --git a/docs/source/xpo_trainer.mdx b/docs/source/xpo_trainer.mdx index ef23da1db8..57fa58c21c 100644 --- a/docs/source/xpo_trainer.mdx +++ b/docs/source/xpo_trainer.mdx @@ -34,11 +34,11 @@ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct") reward_model = AutoModelForSequenceClassification.from_pretrained("trl-lib/Qwen2-0.5B-Reward", num_labels=1) train_dataset = load_dataset("trl-lib/ultrafeedback-prompt", split="train") -args = XPOConfig(output_dir="nash-md-qwen2", logging_steps=10) +training_args = XPOConfig(output_dir="nash-md-qwen2", logging_steps=10) trainer = XPOTrainer( model=model, reward_model=reward_model, - args=args, + args=training_args, tokenizer=tokenizer, train_dataset=train_dataset, ) @@ -66,7 +66,7 @@ Make sure that the SFT model and reward model use the _same_ chat template. Othe We can want the model to generate completion within a given length. During the learning, the model will generate completion up to the maximum completion length specified in the `max_new_tokens` argument of [`XPOConfig`]. I you want to penalize for not generating an EOS token before the maximum completion length, you can use the `missing_eos_penalty` argument of [`XPOConfig`]: ```python -args = XPOConfig(..., max_new_tokens=128, missing_eos_penalty=1.0) +training_args = XPOConfig(..., max_new_tokens=128, missing_eos_penalty=1.0) ``` ### Logging Completions diff --git a/examples/scripts/alignprop.py b/examples/scripts/alignprop.py index f44c677634..376f5bee2b 100644 --- a/examples/scripts/alignprop.py +++ b/examples/scripts/alignprop.py @@ -106,8 +106,8 @@ def image_outputs_logger(image_pair_data, global_step, accelerate_logger): if __name__ == "__main__": parser = HfArgumentParser((ScriptArguments, AlignPropConfig)) - args, alignprop_config = parser.parse_args_into_dataclasses() - alignprop_config.project_kwargs = { + args, training_args = parser.parse_args_into_dataclasses() + training_args.project_kwargs = { "logging_dir": "./logs", "automatic_checkpoint_naming": True, "total_limit": 5, @@ -118,7 +118,7 @@ def image_outputs_logger(image_pair_data, global_step, accelerate_logger): args.pretrained_model, pretrained_model_revision=args.pretrained_revision, use_lora=args.use_lora ) trainer = AlignPropTrainer( - alignprop_config, + training_args, aesthetic_scorer(args.hf_hub_aesthetic_model_id, args.hf_hub_aesthetic_model_filename), prompt_fn, pipeline, diff --git a/examples/scripts/bco.py b/examples/scripts/bco.py index 23d171a8a0..5ac9ed4a0d 100644 --- a/examples/scripts/bco.py +++ b/examples/scripts/bco.py @@ -175,9 +175,9 @@ def mean_pooling(model_output, attention_mask): if __name__ == "__main__": parser = HfArgumentParser((ScriptArguments, BCOConfig, ModelConfig)) - script_args, bco_args, model_args = parser.parse_args_into_dataclasses() + script_args, training_args, model_args = parser.parse_args_into_dataclasses() - bco_args.gradient_checkpointing_kwargs = {"use_reentrant": True} + training_args.gradient_checkpointing_kwargs = {"use_reentrant": True} # Load a pretrained model model = AutoModelForCausalLM.from_pretrained( @@ -208,8 +208,8 @@ def format_dataset(example): # see: https://github.com/huggingface/trl/pull/1255 with PartialState().local_main_process_first(): # Load the dataset - dataset = build_helpfulness_dataset(script_args.llm_name, num_proc=bco_args.dataset_num_proc) - dataset = dataset.map(format_dataset, batched=False, num_proc=bco_args.dataset_num_proc) + dataset = build_helpfulness_dataset(script_args.llm_name, num_proc=training_args.dataset_num_proc) + dataset = dataset.map(format_dataset, batched=False, num_proc=training_args.dataset_num_proc) accelerator = Accelerator() embedding_model = AutoModel.from_pretrained( @@ -232,7 +232,7 @@ def format_dataset(example): bco_trainer = BCOTrainer( model, ref_model, - args=bco_args, + args=training_args, train_dataset=dataset["train"], eval_dataset=dataset["test"], tokenizer=tokenizer, @@ -243,4 +243,4 @@ def format_dataset(example): # Train and push the model to the Hub bco_trainer.train() - bco_trainer.save_model(bco_args.output_dir) + bco_trainer.save_model(training_args.output_dir) diff --git a/examples/scripts/cpo.py b/examples/scripts/cpo.py index 924cbf162a..66c295bfb7 100644 --- a/examples/scripts/cpo.py +++ b/examples/scripts/cpo.py @@ -72,7 +72,7 @@ class ScriptArguments: if __name__ == "__main__": parser = HfArgumentParser((ScriptArguments, CPOConfig, ModelConfig)) - args, cpo_args, model_config = parser.parse_args_into_dataclasses() + args, training_args, model_config = parser.parse_args_into_dataclasses() ################ # Model & Tokenizer @@ -101,14 +101,14 @@ def process(row): # Compute that only on the main process for faster data processing. # see: https://github.com/huggingface/trl/pull/1255 with PartialState().local_main_process_first(): - dataset = dataset.map(process, num_proc=cpo_args.dataset_num_proc) + dataset = dataset.map(process, num_proc=training_args.dataset_num_proc) ################ # Training ################ trainer = CPOTrainer( model, - args=cpo_args, + args=training_args, train_dataset=dataset["train"], eval_dataset=dataset["test"], tokenizer=tokenizer, @@ -117,4 +117,4 @@ def process(row): # train and save the model trainer.train() - trainer.save_model(cpo_args.output_dir) + trainer.save_model(training_args.output_dir) diff --git a/examples/scripts/ddpo.py b/examples/scripts/ddpo.py index 2b318d0b82..c5baae3c12 100644 --- a/examples/scripts/ddpo.py +++ b/examples/scripts/ddpo.py @@ -185,8 +185,8 @@ def image_outputs_logger(image_data, global_step, accelerate_logger): if __name__ == "__main__": parser = HfArgumentParser((ScriptArguments, DDPOConfig)) - args, ddpo_config = parser.parse_args_into_dataclasses() - ddpo_config.project_kwargs = { + args, training_args = parser.parse_args_into_dataclasses() + training_args.project_kwargs = { "logging_dir": "./logs", "automatic_checkpoint_naming": True, "total_limit": 5, @@ -198,7 +198,7 @@ def image_outputs_logger(image_data, global_step, accelerate_logger): ) trainer = DDPOTrainer( - ddpo_config, + training_args, aesthetic_scorer(args.hf_hub_aesthetic_model_id, args.hf_hub_aesthetic_model_filename), prompt_fn, pipeline, diff --git a/examples/scripts/kto.py b/examples/scripts/kto.py index 09d30d62f6..b9ae7bf987 100644 --- a/examples/scripts/kto.py +++ b/examples/scripts/kto.py @@ -74,7 +74,7 @@ class ScriptArguments: if __name__ == "__main__": parser = HfArgumentParser((ScriptArguments, KTOConfig, ModelConfig)) - script_args, kto_args, model_args = parser.parse_args_into_dataclasses() + script_args, training_args, model_args = parser.parse_args_into_dataclasses() # Load a pretrained model model = AutoModelForCausalLM.from_pretrained( @@ -98,7 +98,7 @@ class ScriptArguments: dataset = load_dataset(script_args.dataset_name) # If needed, reformat a DPO-formatted dataset (prompt, chosen, rejected) to a KTO-format (prompt, completion, label) - dataset = maybe_unpair_preference_dataset(dataset, num_proc=kto_args.dataset_num_proc) + dataset = maybe_unpair_preference_dataset(dataset, num_proc=training_args.dataset_num_proc) # Apply chat template def format_dataset(example): @@ -113,13 +113,13 @@ def format_dataset(example): # Compute that only on the main process for faster data processing. # see: https://github.com/huggingface/trl/pull/1255 with PartialState().local_main_process_first(): - dataset = dataset.map(format_dataset, num_proc=kto_args.dataset_num_proc) + dataset = dataset.map(format_dataset, num_proc=training_args.dataset_num_proc) # Initialize the KTO trainer kto_trainer = KTOTrainer( model, ref_model, - args=kto_args, + args=training_args, train_dataset=dataset["train"], eval_dataset=dataset["test"], tokenizer=tokenizer, @@ -128,5 +128,5 @@ def format_dataset(example): # Train and push the model to the Hub kto_trainer.train() - kto_trainer.save_model(kto_args.output_dir) + kto_trainer.save_model(training_args.output_dir) kto_trainer.push_to_hub() diff --git a/examples/scripts/orpo.py b/examples/scripts/orpo.py index 9aa0684c6f..98c66fb578 100644 --- a/examples/scripts/orpo.py +++ b/examples/scripts/orpo.py @@ -72,7 +72,7 @@ class ScriptArguments: if __name__ == "__main__": parser = HfArgumentParser((ScriptArguments, ORPOConfig, ModelConfig)) - args, orpo_args, model_config = parser.parse_args_into_dataclasses() + args, training_args, model_config = parser.parse_args_into_dataclasses() ################ # Model & Tokenizer @@ -102,14 +102,14 @@ def process(row): # Compute that only on the main process for faster data processing. # see: https://github.com/huggingface/trl/pull/1255 with PartialState().local_main_process_first(): - dataset = dataset.map(process, num_prc=orpo_args.dataset_num_proc) + dataset = dataset.map(process, num_prc=training_args.dataset_num_proc) ################ # Training ################ trainer = ORPOTrainer( model, - args=orpo_args, + args=training_args, train_dataset=dataset["train"], eval_dataset=dataset["test"], tokenizer=tokenizer, @@ -118,4 +118,4 @@ def process(row): # train and save the model trainer.train() - trainer.save_model(orpo_args.output_dir) + trainer.save_model(training_args.output_dir) diff --git a/examples/scripts/ppo.py b/examples/scripts/ppo.py index 65f5f4e93e..a8c3140d34 100644 --- a/examples/scripts/ppo.py +++ b/examples/scripts/ppo.py @@ -74,9 +74,9 @@ def build_dataset(query_dataset, dataset_num_proc, input_min_text_length=2, inpu The dataloader for the dataset. """ # load imdb with datasets - ds = load_dataset(query_dataset, split="train") - ds = ds.rename_columns({"text": "review"}) - ds = ds.filter(lambda x: len(x["review"]) > 200, num_proc=dataset_num_proc) + dataset = load_dataset(query_dataset, split="train") + dataset = dataset.rename_columns({"text": "review"}) + dataset = dataset.filter(lambda x: len(x["review"]) > 200, num_proc=dataset_num_proc) input_size = LengthSampler(input_min_text_length, input_max_text_length) @@ -85,9 +85,9 @@ def tokenize(sample): sample["query"] = tokenizer.decode(sample["input_ids"]) return sample - ds = ds.map(tokenize, num_proc=dataset_num_proc) - ds.set_format(type="torch") - return ds + dataset = dataset.map(tokenize, num_proc=dataset_num_proc) + dataset.set_format(type="torch") + return dataset # We retrieve the dataloader by calling the `build_dataset` function. diff --git a/examples/scripts/ppo/ppo.py b/examples/scripts/ppo/ppo.py index 5312f32cc7..541af12b6c 100644 --- a/examples/scripts/ppo/ppo.py +++ b/examples/scripts/ppo/ppo.py @@ -57,9 +57,9 @@ if __name__ == "__main__": parser = HfArgumentParser((PPOv2Config, ModelConfig)) - config, model_config = parser.parse_args_into_dataclasses() + training_args, model_config = parser.parse_args_into_dataclasses() # remove output_dir if exists - shutil.rmtree(config.output_dir, ignore_errors=True) + shutil.rmtree(training_args.output_dir, ignore_errors=True) ################ # Model & Tokenizer @@ -73,16 +73,16 @@ if tokenizer.chat_template is None: tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE value_model = AutoModelForSequenceClassification.from_pretrained( - config.reward_model_path, trust_remote_code=model_config.trust_remote_code, num_labels=1 + training_args.reward_model_path, trust_remote_code=model_config.trust_remote_code, num_labels=1 ) reward_model = AutoModelForSequenceClassification.from_pretrained( - config.reward_model_path, trust_remote_code=model_config.trust_remote_code, num_labels=1 + training_args.reward_model_path, trust_remote_code=model_config.trust_remote_code, num_labels=1 ) ref_policy = AutoModelForCausalLM.from_pretrained( - config.sft_model_path, trust_remote_code=model_config.trust_remote_code + training_args.sft_model_path, trust_remote_code=model_config.trust_remote_code ) policy = AutoModelForCausalLM.from_pretrained( - config.sft_model_path, trust_remote_code=model_config.trust_remote_code + training_args.sft_model_path, trust_remote_code=model_config.trust_remote_code ) ################ # Dataset @@ -107,7 +107,7 @@ def tokenize(element): tokenize, batched=True, remove_columns=dataset.column_names, - num_proc=config.dataset_num_proc, + num_proc=training_args.dataset_num_proc, ) # Compute that only on the main process for faster data processing. @@ -120,7 +120,7 @@ def tokenize(element): # Training ################ trainer = PPOv2Trainer( - config=config, + config=training_args, tokenizer=tokenizer, policy=policy, ref_policy=ref_policy, @@ -130,7 +130,7 @@ def tokenize(element): eval_dataset=eval_dataset, ) trainer.train() - trainer.save_model(config.output_dir) - if config.push_to_hub: + trainer.save_model(training_args.output_dir) + if training_args.push_to_hub: trainer.push_to_hub() trainer.generate_completions() diff --git a/examples/scripts/ppo/ppo_tldr.py b/examples/scripts/ppo/ppo_tldr.py index 138e0f2570..4ea8d36763 100644 --- a/examples/scripts/ppo/ppo_tldr.py +++ b/examples/scripts/ppo/ppo_tldr.py @@ -59,9 +59,9 @@ if __name__ == "__main__": parser = HfArgumentParser((PPOv2Config, ModelConfig)) - config, model_config = parser.parse_args_into_dataclasses() + training_args, model_config = parser.parse_args_into_dataclasses() # remove output_dir if exists - shutil.rmtree(config.output_dir, ignore_errors=True) + shutil.rmtree(training_args.output_dir, ignore_errors=True) ################ # Model & Tokenizer @@ -75,16 +75,16 @@ if tokenizer.chat_template is None: tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE value_model = AutoModelForSequenceClassification.from_pretrained( - config.reward_model_path, trust_remote_code=model_config.trust_remote_code, num_labels=1 + training_args.reward_model_path, trust_remote_code=model_config.trust_remote_code, num_labels=1 ) reward_model = AutoModelForSequenceClassification.from_pretrained( - config.reward_model_path, trust_remote_code=model_config.trust_remote_code, num_labels=1 + training_args.reward_model_path, trust_remote_code=model_config.trust_remote_code, num_labels=1 ) ref_policy = AutoModelForCausalLM.from_pretrained( - config.sft_model_path, trust_remote_code=model_config.trust_remote_code + training_args.sft_model_path, trust_remote_code=model_config.trust_remote_code ) policy = AutoModelForCausalLM.from_pretrained( - config.sft_model_path, trust_remote_code=model_config.trust_remote_code + training_args.sft_model_path, trust_remote_code=model_config.trust_remote_code ) ################ # Dataset @@ -107,7 +107,7 @@ def tokenize(element): return dataset.map( tokenize, remove_columns=dataset.column_names, - num_proc=config.dataset_num_proc, + num_proc=training_args.dataset_num_proc, ) # Compute that only on the main process for faster data processing. @@ -116,15 +116,15 @@ def tokenize(element): train_dataset = prepare_dataset(train_dataset, tokenizer) eval_dataset = prepare_dataset(eval_dataset, tokenizer) # filtering - train_dataset = train_dataset.filter(lambda x: x["lengths"] <= 512, num_proc=config.dataset_num_proc) - eval_dataset = eval_dataset.filter(lambda x: x["lengths"] <= 512, num_proc=config.dataset_num_proc) + train_dataset = train_dataset.filter(lambda x: x["lengths"] <= 512, num_proc=training_args.dataset_num_proc) + eval_dataset = eval_dataset.filter(lambda x: x["lengths"] <= 512, num_proc=training_args.dataset_num_proc) assert train_dataset[0]["input_ids"][-1] != tokenizer.eos_token_id, "The last token should not be an EOS token" ################ # Training ################ trainer = PPOv2Trainer( - config=config, + config=training_args, tokenizer=tokenizer, policy=policy, ref_policy=ref_policy, @@ -134,7 +134,7 @@ def tokenize(element): eval_dataset=eval_dataset, ) trainer.train() - trainer.save_model(config.output_dir) - if config.push_to_hub: + trainer.save_model(training_args.output_dir) + if training_args.push_to_hub: trainer.push_to_hub() trainer.generate_completions() diff --git a/examples/scripts/reward_modeling.py b/examples/scripts/reward_modeling.py index e00e5d8ff8..bbb2e23459 100644 --- a/examples/scripts/reward_modeling.py +++ b/examples/scripts/reward_modeling.py @@ -74,8 +74,8 @@ if __name__ == "__main__": parser = HfArgumentParser((RewardScriptArguments, RewardConfig, ModelConfig)) - args, config, model_config = parser.parse_args_into_dataclasses() - config.gradient_checkpointing_kwargs = dict(use_reentrant=False) + args, training_args, model_config = parser.parse_args_into_dataclasses() + training_args.gradient_checkpointing_kwargs = dict(use_reentrant=False) ################ # Model & Tokenizer @@ -138,19 +138,19 @@ def preprocess_function(examples): chosen_fn = conversations_formatting_function(tokenizer, "chosen") rejected_fn = conversations_formatting_function(tokenizer, "rejected") dataset = dataset.map( - lambda x: {"chosen": chosen_fn(x), "rejected": rejected_fn(x)}, num_proc=config.dataset_num_proc + lambda x: {"chosen": chosen_fn(x), "rejected": rejected_fn(x)}, num_proc=training_args.dataset_num_proc ) # Tokenize inputs dataset = dataset.map( preprocess_function, batched=True, - num_proc=config.dataset_num_proc, + num_proc=training_args.dataset_num_proc, ) # Filter out examples that are too long dataset = dataset.filter( - lambda x: len(x["input_ids_chosen"]) <= config.max_length - and len(x["input_ids_rejected"]) <= config.max_length, - num_proc=config.dataset_num_proc, + lambda x: len(x["input_ids_chosen"]) <= training_args.max_length + and len(x["input_ids_rejected"]) <= training_args.max_length, + num_proc=training_args.dataset_num_proc, ) ########## @@ -159,7 +159,7 @@ def preprocess_function(examples): trainer = RewardTrainer( model=model, tokenizer=tokenizer, - args=config, + args=training_args, train_dataset=dataset[args.dataset_train_split], eval_dataset=dataset[args.dataset_test_split], peft_config=get_peft_config(model_config), @@ -169,9 +169,9 @@ def preprocess_function(examples): ############################ # Save model and push to Hub ############################ - trainer.save_model(config.output_dir) + trainer.save_model(training_args.output_dir) metrics = trainer.evaluate() trainer.log_metrics("eval", metrics) trainer.save_metrics("eval", metrics) - trainer.save_model(config.output_dir) + trainer.save_model(training_args.output_dir) trainer.push_to_hub() diff --git a/examples/scripts/rloo/rloo.py b/examples/scripts/rloo/rloo.py index b4e9c2053e..74a52fe69d 100644 --- a/examples/scripts/rloo/rloo.py +++ b/examples/scripts/rloo/rloo.py @@ -61,9 +61,9 @@ if __name__ == "__main__": parser = HfArgumentParser((RLOOConfig, ModelConfig)) - config, model_config = parser.parse_args_into_dataclasses() + training_args, model_config = parser.parse_args_into_dataclasses() # remove output_dir if exists - shutil.rmtree(config.output_dir, ignore_errors=True) + shutil.rmtree(training_args.output_dir, ignore_errors=True) ################ # Model & Tokenizer @@ -77,21 +77,21 @@ if tokenizer.chat_template is None: tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE reward_model = AutoModelForSequenceClassification.from_pretrained( - config.reward_model_path, trust_remote_code=model_config.trust_remote_code, num_labels=1 + training_args.reward_model_path, trust_remote_code=model_config.trust_remote_code, num_labels=1 ) ref_policy = AutoModelForCausalLM.from_pretrained( - config.sft_model_path, trust_remote_code=model_config.trust_remote_code + training_args.sft_model_path, trust_remote_code=model_config.trust_remote_code ) policy = AutoModelForCausalLM.from_pretrained( - config.sft_model_path, trust_remote_code=model_config.trust_remote_code + training_args.sft_model_path, trust_remote_code=model_config.trust_remote_code ) ################ # Dataset ################ - raw_datasets = load_dataset("trl-internal-testing/descriptiveness-sentiment-trl-style", split="descriptiveness") + dataset = load_dataset("trl-internal-testing/descriptiveness-sentiment-trl-style", split="descriptiveness") eval_samples = 20 - train_dataset = raw_datasets.select(range(len(raw_datasets) - eval_samples)) - eval_dataset = raw_datasets.select(range(len(raw_datasets) - eval_samples, len(raw_datasets))) + train_dataset = dataset.select(range(len(dataset) - eval_samples)) + eval_dataset = dataset.select(range(len(dataset) - eval_samples, len(dataset))) dataset_text_field = "prompt" def prepare_dataset(dataset, tokenizer): @@ -108,7 +108,7 @@ def tokenize(element): tokenize, batched=True, remove_columns=dataset.column_names, - num_proc=config.dataset_num_proc, + num_proc=training_args.dataset_num_proc, ) # Compute that only on the main process for faster data processing. @@ -121,7 +121,7 @@ def tokenize(element): # Training ################ trainer = RLOOTrainer( - config=config, + config=training_args, tokenizer=tokenizer, policy=policy, ref_policy=ref_policy, @@ -130,7 +130,7 @@ def tokenize(element): eval_dataset=eval_dataset, ) trainer.train() - trainer.save_model(config.output_dir) - if config.push_to_hub: + trainer.save_model(training_args.output_dir) + if training_args.push_to_hub: trainer.push_to_hub() trainer.generate_completions() diff --git a/examples/scripts/rloo/rloo_tldr.py b/examples/scripts/rloo/rloo_tldr.py index ae96d1f0c1..6ebad8d2e9 100644 --- a/examples/scripts/rloo/rloo_tldr.py +++ b/examples/scripts/rloo/rloo_tldr.py @@ -62,9 +62,9 @@ if __name__ == "__main__": parser = HfArgumentParser((RLOOConfig, ModelConfig)) - config, model_config = parser.parse_args_into_dataclasses() + training_args, model_config = parser.parse_args_into_dataclasses() # remove output_dir if exists - shutil.rmtree(config.output_dir, ignore_errors=True) + shutil.rmtree(training_args.output_dir, ignore_errors=True) ################ # Model & Tokenizer @@ -78,13 +78,13 @@ if tokenizer.chat_template is None: tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE reward_model = AutoModelForSequenceClassification.from_pretrained( - config.reward_model_path, trust_remote_code=model_config.trust_remote_code, num_labels=1 + training_args.reward_model_path, trust_remote_code=model_config.trust_remote_code, num_labels=1 ) ref_policy = AutoModelForCausalLM.from_pretrained( - config.sft_model_path, trust_remote_code=model_config.trust_remote_code + training_args.sft_model_path, trust_remote_code=model_config.trust_remote_code ) policy = AutoModelForCausalLM.from_pretrained( - config.sft_model_path, trust_remote_code=model_config.trust_remote_code + training_args.sft_model_path, trust_remote_code=model_config.trust_remote_code ) ################ # Dataset @@ -107,7 +107,7 @@ def tokenize(element): return dataset.map( tokenize, remove_columns=dataset.column_names, - num_proc=config.dataset_num_proc, + num_proc=training_args.dataset_num_proc, ) # Compute that only on the main process for faster data processing. @@ -116,15 +116,15 @@ def tokenize(element): train_dataset = prepare_dataset(train_dataset, tokenizer) eval_dataset = prepare_dataset(eval_dataset, tokenizer) # filtering - train_dataset = train_dataset.filter(lambda x: x["lengths"] <= 512, num_proc=config.dataset_num_proc) - eval_dataset = eval_dataset.filter(lambda x: x["lengths"] <= 512, num_proc=config.dataset_num_proc) + train_dataset = train_dataset.filter(lambda x: x["lengths"] <= 512, num_proc=training_args.dataset_num_proc) + eval_dataset = eval_dataset.filter(lambda x: x["lengths"] <= 512, num_proc=training_args.dataset_num_proc) assert train_dataset[0]["input_ids"][-1] != tokenizer.eos_token_id, "The last token should not be an EOS token" ################ # Training ################ trainer = RLOOTrainer( - config=config, + config=training_args, tokenizer=tokenizer, policy=policy, ref_policy=ref_policy, @@ -133,7 +133,7 @@ def tokenize(element): eval_dataset=eval_dataset, ) trainer.train() - trainer.save_model(config.output_dir) - if config.push_to_hub: + trainer.save_model(training_args.output_dir) + if training_args.push_to_hub: trainer.push_to_hub() trainer.generate_completions() diff --git a/tests/slow/test_sft_slow.py b/tests/slow/test_sft_slow.py index a6c73825d4..2d689b1ab4 100644 --- a/tests/slow/test_sft_slow.py +++ b/tests/slow/test_sft_slow.py @@ -67,7 +67,7 @@ def test_sft_trainer_str(self, model_name, packing): as expected. """ with tempfile.TemporaryDirectory() as tmp_dir: - args = SFTConfig( + training_args = SFTConfig( output_dir=tmp_dir, logging_strategy="no", report_to="none", @@ -80,7 +80,7 @@ def test_sft_trainer_str(self, model_name, packing): trainer = SFTTrainer( model_name, - args=args, + args=training_args, train_dataset=self.train_dataset, eval_dataset=self.eval_dataset, ) @@ -94,7 +94,7 @@ def test_sft_trainer_transformers(self, model_name, packing): as expected. """ with tempfile.TemporaryDirectory() as tmp_dir: - args = SFTConfig( + training_args = SFTConfig( output_dir=tmp_dir, logging_strategy="no", report_to="none", @@ -110,7 +110,7 @@ def test_sft_trainer_transformers(self, model_name, packing): trainer = SFTTrainer( model, - args=args, + args=training_args, tokenizer=tokenizer, train_dataset=self.train_dataset, eval_dataset=self.eval_dataset, @@ -128,7 +128,7 @@ def test_sft_trainer_peft(self, model_name, packing): as expected. """ with tempfile.TemporaryDirectory() as tmp_dir: - args = SFTConfig( + training_args = SFTConfig( output_dir=tmp_dir, logging_strategy="no", report_to="none", @@ -145,7 +145,7 @@ def test_sft_trainer_peft(self, model_name, packing): trainer = SFTTrainer( model, - args=args, + args=training_args, tokenizer=tokenizer, train_dataset=self.train_dataset, eval_dataset=self.eval_dataset, @@ -165,7 +165,7 @@ def test_sft_trainer_transformers_mp(self, model_name, packing): as expected in mixed precision. """ with tempfile.TemporaryDirectory() as tmp_dir: - args = SFTConfig( + training_args = SFTConfig( output_dir=tmp_dir, logging_strategy="no", report_to="none", @@ -182,7 +182,7 @@ def test_sft_trainer_transformers_mp(self, model_name, packing): trainer = SFTTrainer( model, - args=args, + args=training_args, tokenizer=tokenizer, train_dataset=self.train_dataset, eval_dataset=self.eval_dataset, @@ -199,7 +199,7 @@ def test_sft_trainer_transformers_mp_gc(self, model_name, packing, gradient_chec as expected in mixed precision + different scenarios of gradient_checkpointing. """ with tempfile.TemporaryDirectory() as tmp_dir: - args = SFTConfig( + training_args = SFTConfig( output_dir=tmp_dir, logging_strategy="no", report_to="none", @@ -218,7 +218,7 @@ def test_sft_trainer_transformers_mp_gc(self, model_name, packing, gradient_chec trainer = SFTTrainer( model, - args=args, + args=training_args, tokenizer=tokenizer, train_dataset=self.train_dataset, eval_dataset=self.eval_dataset, @@ -236,7 +236,7 @@ def test_sft_trainer_transformers_mp_gc_peft(self, model_name, packing, gradient as expected in mixed precision + different scenarios of gradient_checkpointing. """ with tempfile.TemporaryDirectory() as tmp_dir: - args = SFTConfig( + training_args = SFTConfig( output_dir=tmp_dir, logging_strategy="no", report_to="none", @@ -255,7 +255,7 @@ def test_sft_trainer_transformers_mp_gc_peft(self, model_name, packing, gradient trainer = SFTTrainer( model, - args=args, + args=training_args, tokenizer=tokenizer, train_dataset=self.train_dataset, eval_dataset=self.eval_dataset, @@ -280,7 +280,7 @@ def test_sft_trainer_transformers_mp_gc_device_map( as expected in mixed precision + different scenarios of gradient_checkpointing (single, multi-gpu, etc). """ with tempfile.TemporaryDirectory() as tmp_dir: - args = SFTConfig( + training_args = SFTConfig( output_dir=tmp_dir, logging_strategy="no", report_to="none", @@ -299,7 +299,7 @@ def test_sft_trainer_transformers_mp_gc_device_map( trainer = SFTTrainer( model, - args=args, + args=training_args, tokenizer=tokenizer, train_dataset=self.train_dataset, eval_dataset=self.eval_dataset, @@ -318,7 +318,7 @@ def test_sft_trainer_transformers_mp_gc_peft_qlora(self, model_name, packing, gr as expected in mixed precision + different scenarios of gradient_checkpointing. """ with tempfile.TemporaryDirectory() as tmp_dir: - args = SFTConfig( + training_args = SFTConfig( output_dir=tmp_dir, logging_strategy="no", report_to="none", @@ -339,7 +339,7 @@ def test_sft_trainer_transformers_mp_gc_peft_qlora(self, model_name, packing, gr trainer = SFTTrainer( model, - args=args, + args=training_args, tokenizer=tokenizer, train_dataset=self.train_dataset, eval_dataset=self.eval_dataset, @@ -363,7 +363,7 @@ def test_sft_trainer_with_chat_format_qlora(self, model_name, packing): with tempfile.TemporaryDirectory() as tmp_dir: train_dataset = load_dataset("trl-internal-testing/dolly-chatml-sft", split="train") - args = SFTConfig( + training_args = SFTConfig( packing=packing, max_seq_length=self.max_seq_length, output_dir=tmp_dir, @@ -383,7 +383,7 @@ def test_sft_trainer_with_chat_format_qlora(self, model_name, packing): trainer = SFTTrainer( model, - args=args, + args=training_args, tokenizer=tokenizer, train_dataset=train_dataset, peft_config=self.peft_config, @@ -403,7 +403,7 @@ def test_sft_trainer_with_liger(self, model_name, packing): with AutoLigerKernelForCausalLM as expected. """ with tempfile.TemporaryDirectory() as tmp_dir: - args = SFTConfig( + training_args = SFTConfig( output_dir=tmp_dir, logging_strategy="no", report_to="none", @@ -417,7 +417,7 @@ def test_sft_trainer_with_liger(self, model_name, packing): trainer = SFTTrainer( model_name, - args=args, + args=training_args, train_dataset=self.train_dataset, eval_dataset=self.eval_dataset, ) diff --git a/tests/test_alignprop_trainer.py b/tests/test_alignprop_trainer.py index bb25bb7cd7..995b91a750 100644 --- a/tests/test_alignprop_trainer.py +++ b/tests/test_alignprop_trainer.py @@ -42,7 +42,7 @@ class AlignPropTrainerTester(unittest.TestCase): """ def setUp(self): - alignprop_config = AlignPropConfig( + training_args = AlignPropConfig( num_epochs=2, train_gradient_accumulation_steps=1, train_batch_size=2, @@ -58,11 +58,9 @@ def setUp(self): pipeline_without_lora = DefaultDDPOStableDiffusionPipeline( pretrained_model, pretrained_model_revision=pretrained_revision, use_lora=False ) - self.trainer_with_lora = AlignPropTrainer( - alignprop_config, scorer_function, prompt_function, pipeline_with_lora - ) + self.trainer_with_lora = AlignPropTrainer(training_args, scorer_function, prompt_function, pipeline_with_lora) self.trainer_without_lora = AlignPropTrainer( - alignprop_config, scorer_function, prompt_function, pipeline_without_lora + training_args, scorer_function, prompt_function, pipeline_without_lora ) def tearDown(self) -> None: diff --git a/tests/test_callbacks.py b/tests/test_callbacks.py index a1e6902f93..e19375ff1d 100644 --- a/tests/test_callbacks.py +++ b/tests/test_callbacks.py @@ -70,7 +70,7 @@ def tokenize_function(examples): def test_basic(self): with tempfile.TemporaryDirectory() as tmp_dir: - args = TrainingArguments( + training_args = TrainingArguments( output_dir=tmp_dir, eval_strategy="steps", eval_steps=2, # evaluate every 2 steps @@ -81,7 +81,7 @@ def test_basic(self): trainer = TrainerWithRefModel( model=self.model, ref_model=self.ref_model, - args=args, + args=training_args, train_dataset=self.dataset["train"], eval_dataset=self.dataset["test"], tokenizer=self.tokenizer, @@ -97,7 +97,7 @@ def test_basic(self): def test_without_ref_model(self): # Same as before, but without the ref_model attribute. It should use the model attribute instead with tempfile.TemporaryDirectory() as tmp_dir: - args = TrainingArguments( + training_args = TrainingArguments( output_dir=tmp_dir, eval_strategy="steps", eval_steps=2, # evaluate every 2 steps @@ -107,7 +107,7 @@ def test_without_ref_model(self): ) trainer = Trainer( model=self.model, - args=args, + args=training_args, train_dataset=self.dataset["train"], eval_dataset=self.dataset["test"], tokenizer=self.tokenizer, @@ -130,7 +130,7 @@ def test_lora(self): task_type="CAUSAL_LM", ) self.model.add_adapter(peft_config) - args = TrainingArguments( + training_args = TrainingArguments( output_dir=tmp_dir, eval_strategy="steps", eval_steps=2, # evaluate every 2 steps @@ -140,7 +140,7 @@ def test_lora(self): ) trainer = Trainer( model=self.model, - args=args, + args=training_args, train_dataset=self.dataset["train"], eval_dataset=self.dataset["test"], tokenizer=self.tokenizer, diff --git a/tests/test_ddpo_trainer.py b/tests/test_ddpo_trainer.py index 71c94e7502..8db0b0747a 100644 --- a/tests/test_ddpo_trainer.py +++ b/tests/test_ddpo_trainer.py @@ -41,7 +41,7 @@ class DDPOTrainerTester(unittest.TestCase): """ def setUp(self): - self.ddpo_config = DDPOConfig( + self.training_args = DDPOConfig( num_epochs=2, train_gradient_accumulation_steps=1, per_prompt_stat_tracking_buffer_size=32, @@ -57,7 +57,7 @@ def setUp(self): pretrained_model, pretrained_model_revision=pretrained_revision, use_lora=False ) - self.trainer = DDPOTrainer(self.ddpo_config, scorer_function, prompt_function, pipeline) + self.trainer = DDPOTrainer(self.training_args, scorer_function, prompt_function, pipeline) return super().setUp() @@ -107,7 +107,7 @@ class DDPOTrainerWithLoRATester(DDPOTrainerTester): """ def setUp(self): - self.ddpo_config = DDPOConfig( + self.training_args = DDPOConfig( num_epochs=2, train_gradient_accumulation_steps=1, per_prompt_stat_tracking_buffer_size=32, @@ -123,6 +123,6 @@ def setUp(self): pretrained_model, pretrained_model_revision=pretrained_revision, use_lora=True ) - self.trainer = DDPOTrainer(self.ddpo_config, scorer_function, prompt_function, pipeline) + self.trainer = DDPOTrainer(self.training_args, scorer_function, prompt_function, pipeline) return super().setUp() diff --git a/tests/test_dpo_trainer.py b/tests/test_dpo_trainer.py index f6a50c3ee7..7d6e5c4670 100644 --- a/tests/test_dpo_trainer.py +++ b/tests/test_dpo_trainer.py @@ -112,7 +112,7 @@ def mock_vision_processor(text, images=None, add_special_tokens=True): class TestTruncateTokens(unittest.TestCase): def setUp(self): with tempfile.TemporaryDirectory() as tmp_dir: - self.args = DPOConfig( + self.training_args = DPOConfig( max_length=20, max_prompt_length=10, truncation_mode="keep_start", output_dir=tmp_dir ) @@ -135,7 +135,7 @@ def test_truncate_tokens(self): ] prompt_tokens = [{"prompt_input_ids": list(range(15)), "prompt_attention_mask": [1] * 15}] - _truncate_tokens(chosen_tokens, rejected_tokens, prompt_tokens, self.args) + _truncate_tokens(chosen_tokens, rejected_tokens, prompt_tokens, self.training_args) # Check if prompt is truncated correctly self.assertEqual(len(chosen_tokens[0]["prompt_input_ids"]), 10) @@ -152,7 +152,7 @@ def test_truncate_tokens(self): self.assertEqual(len(rejected_tokens[0]["attention_mask"]), 10) def test_truncation_mode_keep_end(self): - self.args.truncation_mode = "keep_end" + self.training_args.truncation_mode = "keep_end" chosen_tokens = [ { "prompt_input_ids": list(range(15)), @@ -171,7 +171,7 @@ def test_truncation_mode_keep_end(self): ] prompt_tokens = [{"prompt_input_ids": list(range(15)), "prompt_attention_mask": [1] * 15}] - _truncate_tokens(chosen_tokens, rejected_tokens, prompt_tokens, self.args) + _truncate_tokens(chosen_tokens, rejected_tokens, prompt_tokens, self.training_args) # Check if prompt is truncated correctly from the end self.assertEqual(prompt_tokens[0]["prompt_input_ids"], list(range(5, 15))) @@ -190,9 +190,9 @@ def test_truncation_mode_keep_end(self): self.assertEqual(rejected_tokens[0]["attention_mask"], [1] * 10) def test_invalid_truncation_mode(self): - self.args.truncation_mode = "invalid_mode" + self.training_args.truncation_mode = "invalid_mode" with self.assertRaises(ValueError): - _truncate_tokens([], [], [], self.args) + _truncate_tokens([], [], [], self.training_args) class DPOTrainerTester(unittest.TestCase): @@ -895,7 +895,7 @@ def test_dpo_trainer_torch_dtype(self): # See https://github.com/huggingface/trl/issues/1751 dummy_dataset = load_dataset("trl-internal-testing/zen", "standard_preference") with tempfile.TemporaryDirectory() as tmp_dir: - dpo_config = DPOConfig( + training_args = DPOConfig( output_dir=tmp_dir, per_device_train_batch_size=2, max_steps=1, @@ -908,7 +908,7 @@ def test_dpo_trainer_torch_dtype(self): model=self.model_id, ref_model=self.model_id, tokenizer=self.tokenizer, - args=dpo_config, + args=training_args, train_dataset=dummy_dataset["train"], ) assert trainer.model.config.torch_dtype == torch.float16 @@ -916,7 +916,7 @@ def test_dpo_trainer_torch_dtype(self): # Now test when `torch_dtype` is provided but is wrong to either the model or the ref_model with tempfile.TemporaryDirectory() as tmp_dir: - dpo_config = DPOConfig( + training_args = DPOConfig( output_dir=tmp_dir, per_device_train_batch_size=2, max_steps=1, @@ -931,12 +931,12 @@ def test_dpo_trainer_torch_dtype(self): _ = DPOTrainer( model=self.model_id, tokenizer=self.tokenizer, - args=dpo_config, + args=training_args, train_dataset=dummy_dataset["train"], ) with tempfile.TemporaryDirectory() as tmp_dir: - dpo_config = DPOConfig( + training_args = DPOConfig( output_dir=tmp_dir, per_device_train_batch_size=2, max_steps=1, @@ -952,7 +952,7 @@ def test_dpo_trainer_torch_dtype(self): model=self.model_id, ref_model=self.model_id, tokenizer=self.tokenizer, - args=dpo_config, + args=training_args, train_dataset=dummy_dataset["train"], ) diff --git a/tests/test_iterative_sft_trainer.py b/tests/test_iterative_sft_trainer.py index 9cdda3418f..295d82bf69 100644 --- a/tests/test_iterative_sft_trainer.py +++ b/tests/test_iterative_sft_trainer.py @@ -100,14 +100,14 @@ def test_iterative_step_from_tensor(self, model_name, input_name): model = self.t5_model tokenizer = self.t5_tokenizer - args = TrainingArguments( + training_args = TrainingArguments( output_dir=tmp_dir, per_device_train_batch_size=2, max_steps=2, learning_rate=1e-3, report_to="none", ) - iterative_trainer = IterativeSFTTrainer(model=model, args=args, tokenizer=tokenizer) + iterative_trainer = IterativeSFTTrainer(model=model, args=training_args, tokenizer=tokenizer) iterative_trainer.optimizer.zero_grad = partial(iterative_trainer.optimizer.zero_grad, set_to_none=False) iterative_trainer.step(**inputs) diff --git a/tests/test_trainers_args.py b/tests/test_trainers_args.py index 173369a020..cbd43ccf0c 100644 --- a/tests/test_trainers_args.py +++ b/tests/test_trainers_args.py @@ -41,7 +41,7 @@ def test_bco(self): tokenizer = AutoTokenizer.from_pretrained("gpt2") dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference", split="train") with tempfile.TemporaryDirectory() as tmp_dir: - args = BCOConfig( + training_args = BCOConfig( tmp_dir, max_length=256, max_prompt_length=64, @@ -60,7 +60,9 @@ def test_bco(self): min_density_ratio=0.2, max_density_ratio=20.0, ) - trainer = BCOTrainer(model="gpt2", ref_model="gpt2", args=args, train_dataset=dataset, tokenizer=tokenizer) + trainer = BCOTrainer( + model="gpt2", ref_model="gpt2", args=training_args, train_dataset=dataset, tokenizer=tokenizer + ) self.assertEqual(trainer.args.max_length, 256) self.assertEqual(trainer.args.max_prompt_length, 64) self.assertEqual(trainer.args.max_completion_length, 64) @@ -82,7 +84,7 @@ def test_cpo(self): tokenizer = AutoTokenizer.from_pretrained("gpt2") dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") with tempfile.TemporaryDirectory() as tmp_dir: - args = CPOConfig( + training_args = CPOConfig( tmp_dir, max_length=256, max_prompt_length=64, @@ -101,7 +103,7 @@ def test_cpo(self): model_init_kwargs={"trust_remote_code": True}, dataset_num_proc=4, ) - trainer = CPOTrainer(model="gpt2", args=args, train_dataset=dataset, tokenizer=tokenizer) + trainer = CPOTrainer(model="gpt2", args=training_args, train_dataset=dataset, tokenizer=tokenizer) self.assertEqual(trainer.args.max_length, 256) self.assertEqual(trainer.args.max_prompt_length, 64) self.assertEqual(trainer.args.max_completion_length, 64) @@ -123,7 +125,7 @@ def test_dpo(self): tokenizer = AutoTokenizer.from_pretrained("gpt2") dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") with tempfile.TemporaryDirectory() as tmp_dir: - args = DPOConfig( + training_args = DPOConfig( tmp_dir, beta=0.5, label_smoothing=0.5, @@ -152,7 +154,9 @@ def test_dpo(self): ref_model_sync_steps=32, rpo_alpha=0.5, ) - trainer = DPOTrainer(model="gpt2", ref_model="gpt2", args=args, train_dataset=dataset, tokenizer=tokenizer) + trainer = DPOTrainer( + model="gpt2", ref_model="gpt2", args=training_args, train_dataset=dataset, tokenizer=tokenizer + ) self.assertEqual(trainer.args.beta, 0.5) self.assertEqual(trainer.args.label_smoothing, 0.5) self.assertEqual(trainer.args.loss_type, "hinge") @@ -184,7 +188,7 @@ def test_kto(self): tokenizer = AutoTokenizer.from_pretrained("gpt2") dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference", split="train") with tempfile.TemporaryDirectory() as tmp_dir: - args = KTOConfig( + training_args = KTOConfig( tmp_dir, max_length=256, max_prompt_length=64, @@ -202,7 +206,9 @@ def test_kto(self): ref_model_init_kwargs={"trust_remote_code": True}, dataset_num_proc=4, ) - trainer = KTOTrainer(model="gpt2", ref_model="gpt2", args=args, train_dataset=dataset, tokenizer=tokenizer) + trainer = KTOTrainer( + model="gpt2", ref_model="gpt2", args=training_args, train_dataset=dataset, tokenizer=tokenizer + ) self.assertEqual(trainer.args.max_length, 256) self.assertEqual(trainer.args.max_prompt_length, 64) self.assertEqual(trainer.args.max_completion_length, 64) @@ -223,7 +229,7 @@ def test_online_dpo(self): tokenizer = AutoTokenizer.from_pretrained("gpt2") dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") with tempfile.TemporaryDirectory() as tmp_dir: - args = OnlineDPOConfig( + training_args = OnlineDPOConfig( tmp_dir, max_new_tokens=42, temperature=0.5, @@ -236,7 +242,7 @@ def test_online_dpo(self): ref_model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-14m") reward_model = AutoModelForSequenceClassification.from_pretrained("EleutherAI/pythia-14m", num_labels=1) trainer = OnlineDPOTrainer( - args=args, + args=training_args, tokenizer=tokenizer, model=model, ref_model=ref_model, @@ -254,7 +260,7 @@ def test_orpo(self): tokenizer = AutoTokenizer.from_pretrained("gpt2") dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") with tempfile.TemporaryDirectory() as tmp_dir: - args = ORPOConfig( + training_args = ORPOConfig( tmp_dir, max_length=256, max_prompt_length=64, @@ -270,7 +276,7 @@ def test_orpo(self): dataset_num_proc=4, ) - trainer = ORPOTrainer(model="gpt2", args=args, train_dataset=dataset, tokenizer=tokenizer) + trainer = ORPOTrainer(model="gpt2", args=training_args, train_dataset=dataset, tokenizer=tokenizer) self.assertEqual(trainer.args.max_length, 256) self.assertEqual(trainer.args.max_prompt_length, 64) self.assertEqual(trainer.args.max_completion_length, 64) @@ -281,7 +287,7 @@ def test_orpo(self): def test_sft(self): dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") with tempfile.TemporaryDirectory() as tmp_dir: - args = SFTConfig( + training_args = SFTConfig( tmp_dir, dataset_text_field="dummy_text_field", packing=True, @@ -295,7 +301,7 @@ def test_sft(self): num_of_sequences=32, chars_per_token=4.2, ) - trainer = SFTTrainer("gpt2", args=args, train_dataset=dataset) + trainer = SFTTrainer("gpt2", args=training_args, train_dataset=dataset) self.assertEqual(trainer.args.dataset_text_field, "dummy_text_field") self.assertEqual(trainer.args.packing, True) self.assertEqual(trainer.args.max_seq_length, 256)