Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix tests with main revision and read token #33560

Merged
merged 8 commits into from
Sep 19, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 12 additions & 25 deletions tests/models/mamba2/test_modeling_mamba2.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from parameterized import parameterized

from transformers import AutoTokenizer, Mamba2Config, is_torch_available
from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
from transformers.testing_utils import require_read_token, require_torch, require_torch_gpu, slow, torch_device

from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester
Expand Down Expand Up @@ -96,7 +96,7 @@ def __init__(
self.tie_word_embeddings = tie_word_embeddings

def get_large_model_config(self):
return Mamba2Config.from_pretrained("revision='refs/pr/9'")
return Mamba2Config.from_pretrained("mistralai/Mamba-Codestral-7B-v0.1")

def prepare_config_and_inputs(
self, gradient_checkpointing=False, scale_attn_by_inverse_layer_idx=False, reorder_and_upcast_attn=False
Expand Down Expand Up @@ -199,34 +199,26 @@ def test_initialization(self):
def test_tied_weights_keys(self):
pass

@unittest.skip(reason="To fix, Mamba 2 cache slicing is interacting with beam search")
def test_beam_search_generate_dict_outputs_use_cache(self):
pass

@unittest.skip(reason="To fix, Mamba 2 cache slicing is interacting with beam search")
def test_beam_sample_generate(self):
@unittest.skip(reason="To fix, Mamba 2 cache slicing test case is an edge case")
def test_generate_without_input_ids(self):
pass

@unittest.skip(reason="To fix, Mamba 2 cache slicing test case is an edge case")
def test_generate_without_input_ids(self):
def test_generate_from_inputs_embeds_decoder_only(self):
pass

@unittest.skip(reason="To fix, Mamba 2 cache slicing test case is an edge case")
def test_greedy_generate_dict_outputs_use_cache(self):
pass

@unittest.skip(reason="Initialization of mamba2 fails this")
def test_save_load_fast_init_from_base(self):
@unittest.skip(reason="To fix, Mamba 2 cache slicing is interacting with beam search")
def test_beam_search_generate_dict_outputs_use_cache(self):
pass

@unittest.skip(reason="A large mamba2 would be necessary (and costly) for that")
def test_multi_gpu_data_parallel_forward(self):
pass

@unittest.skip(reason="To fix, Mamba 2 cache slicing test case is an edge case")
def test_generate_from_inputs_embeds_decoder_only(self):
pass

def test_model_outputs_equivalence(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

Expand Down Expand Up @@ -292,12 +284,11 @@ def test_inputs_embeds_matches_input_ids_with_generate(self):

@require_torch
@slow
@require_read_token
class Mamba2IntegrationTest(unittest.TestCase):
def setUp(self):
self.model_id = "mistralai/Mamba-Codestral-7B-v0.1"
self.tokenizer = AutoTokenizer.from_pretrained(
self.model_id, revision="refs/pr/9", from_slow=True, legacy=False
)
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, from_slow=True, legacy=False)
self.prompt = ("[INST]Write a hello world program in C++.",)

@parameterized.expand(
Expand All @@ -317,7 +308,7 @@ def test_simple_generate(self, device):
tokenizer = self.tokenizer
tokenizer.pad_token_id = tokenizer.eos_token_id

model = Mamba2ForCausalLM.from_pretrained(self.model_id, revision="refs/pr/9", torch_dtype=torch.bfloat16)
model = Mamba2ForCausalLM.from_pretrained(self.model_id, torch_dtype=torch.bfloat16)
model.to(device)
input_ids = tokenizer("[INST]Write a hello world program in C++.[/INST]", return_tensors="pt")["input_ids"].to(
device
Expand All @@ -343,9 +334,7 @@ def test_batched_equivalence_with_cache(self):
"[INST] Write a simple Fibonacci number computation function in Rust that does memoization, with comments, in safe Rust.[/INST]",
]

model = Mamba2ForCausalLM.from_pretrained(self.model_id, revision="refs/pr/9", torch_dtype=torch.bfloat16).to(
torch_device
)
model = Mamba2ForCausalLM.from_pretrained(self.model_id, torch_dtype=torch.bfloat16).to(torch_device)
tokenizer.pad_token_id = tokenizer.eos_token_id
# batched generation
tokenized_prompts = tokenizer(prompt, return_tensors="pt", padding="longest").to(torch_device)
Expand Down Expand Up @@ -375,9 +364,7 @@ def test_batched_equivalence_without_cache(self):
"[INST] Write a simple Fibonacci number computation function in Rust that does memoization, with comments, in safe Rust.[/INST]",
]

model = Mamba2ForCausalLM.from_pretrained(self.model_id, revision="refs/pr/9", torch_dtype=torch.bfloat16).to(
torch_device
)
model = Mamba2ForCausalLM.from_pretrained(self.model_id, torch_dtype=torch.bfloat16).to(torch_device)
tokenizer.pad_token_id = tokenizer.eos_token_id
# batched generation
tokenized_prompts = tokenizer(prompt, return_tensors="pt", padding="longest").to(torch_device)
Expand Down
Loading