You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
fromtqdmimporttqdmimportosfromtorch.optimimportAdamfromtransformersimportAutoModelForCausalLM, AutoTokenizer, AutoModelForSeq2SeqLMimportoslofromoslo.torch.distributedimportParallelContext, ParallelModefromoslo.torch.nn.parallelimportTensorParallelfromdatasetsimportload_datasetfromtorch.utils.dataimportDataLoader, DistributedSamplerBATCH_SIZE=4SEQ_LEN=64SAVE_INTERVAL=50TRAIN_STEP=100model=AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
optimizer=Adam(model.parameters(), lr=3e-5)
tokenizer=AutoTokenizer.from_pretrained("google/flan-t5-small")
# Add pad token for batch training because GPT2 tokenizer doesn't have pad token.tokenizer.pad_token=tokenizer.eos_tokentp_size=2tp_depth=1dp_size=2parallel_context=ParallelContext.from_torch(
data_parallel_size=dp_size,
pipeline_parallel_size=1,
tensor_parallel_size=tp_size,
tensor_parallel_mode=ParallelMode.TENSOR_1D,
tensor_parallel_depth=tp_depth,
)
model=TensorParallel(model, parallel_context)
oslo.ready(model, parallel_context)
datasets=load_dataset("squad").data["train"]["context"]
datasets= [str(_) for_indatasets[: TRAIN_STEP*BATCH_SIZE]]
rank=parallel_context.get_local_rank(ParallelMode.DATA)
train_sampler=DistributedSampler(
datasets, num_replicas=dp_size, rank=rank
)
dataloader=DataLoader(datasets, batch_size=BATCH_SIZE, sampler=train_sampler, shuffle=False)
forstep, batchinenumerate(tqdm(dataloader)):
optimizer.zero_grad()
# Make batchinput_batch=tokenizer(
batch,
return_tensors="pt",
padding=True,
truncation=True,
max_length=SEQ_LEN,
).to("cuda")
# Forward-Backward-Steploss=model(**input_batch, labels=input_batch["input_ids"]).lossloss.backward()
optimizer.step()
# Save the merged model using `save_pretrained`model.save_pretrained(
save_directory="./parallel_ckpt",
merge_checkpoints=True
)
Error
loss = model(**input_batch, labels=input_batch["input_ids"]).loss
File "/home/shreyansh/miniconda3/envs/shreyansh-env-py10/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/shreyansh/miniconda3/envs/shreyansh-env-py10/lib/python3.10/site-packages/oslo_core-3.0.0-py3.10.egg/oslo/torch/nn/parallel/tensor_parallel/tensor_parallel.py", line 95, in forward
return self.module_forward(*args, **kwargs)
File "/home/shreyansh/miniconda3/envs/shreyansh-env-py10/lib/python3.10/site-packages/oslo_core-3.0.0-py3.10.egg/oslo/torch/nn/parallel/tensor_parallel/_1d/_wrapper.py", line 61, in forward
return self.module_forward(*args, **kwargs)
File "/home/shreyansh/miniconda3/envs/shreyansh-env-py10/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1709, in forward
encoder_outputs = self.encoder(
File "/home/shreyansh/miniconda3/envs/shreyansh-env-py10/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/shreyansh/miniconda3/envs/shreyansh-env-py10/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1123, in forward
layer_outputs = layer_module(
File "/home/shreyansh/miniconda3/envs/shreyansh-env-py10/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/shreyansh/miniconda3/envs/shreyansh-env-py10/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 755, in forward
hidden_states = self.layer[-1](hidden_states)
File "/home/shreyansh/miniconda3/envs/shreyansh-env-py10/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/shreyansh/miniconda3/envs/shreyansh-env-py10/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 344, in forward
forwarded_states = self.DenseReluDense(forwarded_states)
File "/home/shreyansh/miniconda3/envs/shreyansh-env-py10/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/shreyansh/miniconda3/envs/shreyansh-env-py10/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 327, in forward
hidden_states = self.wo(hidden_states)
File "/home/shreyansh/miniconda3/envs/shreyansh-env-py10/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/shreyansh/miniconda3/envs/shreyansh-env-py10/lib/python3.10/site-packages/oslo_core-3.0.0-py3.10.egg/oslo/torch/nn/modules/linear.py", line 149, in forward
outputs = F.linear(input, self.weight)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (256x1024 and 512x512)
Environment
OS : Ubuntu 22.04
Python version : 3.10.12
Transformers version : 4.34.0
Whether to use Docker: No
Misc.:
The text was updated successfully, but these errors were encountered:
How to reproduce
Using almost the same code from the tutorials
Error
Environment
The text was updated successfully, but these errors were encountered: