From a2f14c72b4c0990423ce641105eb7e14e8c68c97 Mon Sep 17 00:00:00 2001 From: "Kim, Jin (Jay@SKT)" Date: Wed, 13 Nov 2024 11:44:21 +0900 Subject: [PATCH 1/4] add omitted prepare command to multimodal README.md --- examples/multimodal/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/multimodal/README.md b/examples/multimodal/README.md index 5ab0c7bf0b..c52c4a40aa 100644 --- a/examples/multimodal/README.md +++ b/examples/multimodal/README.md @@ -57,7 +57,7 @@ examples/multimodal/combine_mistral_clip.sh /path/to/mistral/model /path/to/clip ``` cd /wds - energon ./ + energon prepare ./ ``` select the following values for the presented options: From 6557e6a70209008de730fc2729b4b9bdbe500643 Mon Sep 17 00:00:00 2001 From: singleheart Date: Thu, 14 Nov 2024 10:11:41 +0900 Subject: [PATCH 2/4] add omitted image directory --- examples/multimodal/convert_llava_pretrain_to_wds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/multimodal/convert_llava_pretrain_to_wds.py b/examples/multimodal/convert_llava_pretrain_to_wds.py index 0092aef246..9c27ac8210 100644 --- a/examples/multimodal/convert_llava_pretrain_to_wds.py +++ b/examples/multimodal/convert_llava_pretrain_to_wds.py @@ -19,7 +19,7 @@ with wds.ShardWriter(os.path.join(output, 'pretrain-%d.tar'), maxcount=10000) as shard_writer: for entry in tqdm(data): - with open(os.path.join(llava_pretrain_dir, entry['image']), "rb") as img_file: + with open(os.path.join(llava_pretrain_dir, 'image', entry['image']), "rb") as img_file: image_data = img_file.read() sample = { "__key__": entry['id'], From 418c6d5f809452182acf6325b354a3caf8cfa5a9 Mon Sep 17 00:00:00 2001 From: singleheart Date: Fri, 15 Nov 2024 15:36:03 +0900 Subject: [PATCH 3/4] Fix: Correct 'sample.answers' access to use the 'value' key --- examples/multimodal/dataset_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/multimodal/dataset_helpers.py b/examples/multimodal/dataset_helpers.py index 757d41ae47..541dfb69fd 100644 --- a/examples/multimodal/dataset_helpers.py +++ b/examples/multimodal/dataset_helpers.py @@ -277,7 +277,7 @@ def encode_any_single_turn_vqa(self, sample): answer_idx = np.random.choice(weight_list.shape[0], 1, p=weight_list)[0] cur_answer = answer_list[answer_idx] else: - cur_answer = sample.answers + cur_answer = sample.answers['value'] else: raise NotImplementedError("Unsupported data type provided", sample) From 5391356f3ebe96ab4c51219a0d25b67716b2df90 Mon Sep 17 00:00:00 2001 From: singleheart Date: Fri, 15 Nov 2024 15:38:26 +0900 Subject: [PATCH 4/4] Fix: Apply 'mistral_custom_template' to llama3 for error resolution --- megatron/training/tokenizer/multimodal_tokenizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/megatron/training/tokenizer/multimodal_tokenizer.py b/megatron/training/tokenizer/multimodal_tokenizer.py index 0c3ec6a906..d6d0084e45 100644 --- a/megatron/training/tokenizer/multimodal_tokenizer.py +++ b/megatron/training/tokenizer/multimodal_tokenizer.py @@ -93,7 +93,7 @@ def __init__( self._prompt_config = PromptConfig( assistant_prefix_len=4, pad_token_id=tokenizer.convert_tokens_to_ids("<|end_of_text|>"), - custom_chat_template=None, + custom_chat_template=mistral_custom_template, has_bos=True, has_system_role=True, )