Small readme, config updates (#2157)

ebsmothers · Felipe Mello · commit 0bb00a47b12b · 2024-12-20T11:05:12.000-08:00
diff --git a/README.md b/README.md
@@ -140,7 +140,7 @@ loss=torchtune.modules.loss.CEWithChunkedOutputLoss \
 enable_activation_checkpointing=True \
 optimizer_in_bwd=False \
 enable_activation_offloading=True \
-optimizer._component_=torch.optim.AdamW \
+optimizer=torch.optim.AdamW \
 tokenizer.max_seq_len=4096 \
 gradient_accumulation_steps=1 \
 epochs=1 \
diff --git a/recipes/configs/llama3/8B_dora.yaml b/recipes/configs/llama3/8B_dora.yaml
@@ -61,7 +61,7 @@ lr_scheduler:
   num_warmup_steps: 100
 
 loss:
-  _component_: torch.nn.CrossEntropyLoss
+  _component_: torchtune.modules.loss.CEWithChunkedOutputLoss
 
 # Training
 epochs: 1
diff --git a/recipes/configs/llama3/8B_dora_single_device.yaml b/recipes/configs/llama3/8B_dora_single_device.yaml
@@ -63,7 +63,7 @@ lr_scheduler:
   num_warmup_steps: 100
 
 loss:
-  _component_: torch.nn.CrossEntropyLoss
+  _component_: torchtune.modules.loss.CEWithChunkedOutputLoss
 
 # Training
 epochs: 1
diff --git a/recipes/configs/llama3/8B_qdora_single_device.yaml b/recipes/configs/llama3/8B_qdora_single_device.yaml
@@ -64,7 +64,7 @@ lr_scheduler:
   num_warmup_steps: 100
 
 loss:
-  _component_: torch.nn.CrossEntropyLoss
+  _component_: torchtune.modules.loss.CEWithChunkedOutputLoss
 
 # Training
 epochs: 1
diff --git a/recipes/configs/llama3_1/405B_qlora.yaml b/recipes/configs/llama3_1/405B_qlora.yaml
@@ -61,7 +61,7 @@ lr_scheduler:
   num_warmup_steps: 100
 
 loss:
-  _component_: torch.nn.CrossEntropyLoss
+  _component_: torchtune.modules.loss.CEWithChunkedOutputLoss
 
 fsdp:
   cpu_offload: False