From c05a7940fe7186dc77bcc5ad4ab996a8a7f426e1 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Fri, 31 Jan 2025 15:20:45 +1100 Subject: [PATCH] [Releases 2.13] Add support for legacy OpenAI CLIP model (#1107) --- .../inference/embedding_models/open_clip_model.py | 3 ++- tests/core/inference/test_open_clip_model_load.py | 13 ++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/marqo/core/inference/embedding_models/open_clip_model.py b/src/marqo/core/inference/embedding_models/open_clip_model.py index fdc050316..99057a6ec 100644 --- a/src/marqo/core/inference/embedding_models/open_clip_model.py +++ b/src/marqo/core/inference/embedding_models/open_clip_model.py @@ -210,7 +210,8 @@ def _load_model_and_image_preprocessor_from_open_clip_repo(self) -> Tuple[torch. def _load_tokenizer_from_checkpoint(self) -> Callable: if not self.model_properties.tokenizer: - return open_clip.get_tokenizer(self.model_properties.name) + # Replace '/'with '-' to support old clip model name style + return open_clip.get_tokenizer(self.model_properties.name.replace("/", "-")) else: logger.info(f"Custom HFTokenizer is provided. Loading...") return HFTokenizer(self.model_properties.tokenizer) diff --git a/tests/core/inference/test_open_clip_model_load.py b/tests/core/inference/test_open_clip_model_load.py index 7ecd6bb80..62e6c5339 100644 --- a/tests/core/inference/test_open_clip_model_load.py +++ b/tests/core/inference/test_open_clip_model_load.py @@ -276,4 +276,15 @@ def test_load_OpenCLIPModel_with_auth_hf(self): mock_download_model.assert_called_once_with( repo_location=ModelLocation(**model_properties["model_location"]), auth=model_auth, - ) \ No newline at end of file + ) + + def test_load_legacy_openai_clip_model(self): + """A test to ensure old OpenAI CLIP models (e.g., ViT-B/32) are loaded correctly.""" + model_properties = { + "name": "ViT-B/32", # Old OpenAI CLIP model name + "type": "open_clip", + "url": "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_e32-46683a32.pt", + "dimensions": 512 + } + model = OPEN_CLIP(model_properties=model_properties, device="cpu") + model.load() \ No newline at end of file