From c05a7940fe7186dc77bcc5ad4ab996a8a7f426e1 Mon Sep 17 00:00:00 2001
From: Li Wan
Date: Fri, 31 Jan 2025 15:20:45 +1100
Subject: [PATCH] [Releases 2.13] Add support for legacy OpenAI CLIP model
(#1107)
---
.../inference/embedding_models/open_clip_model.py | 3 ++-
tests/core/inference/test_open_clip_model_load.py | 13 ++++++++++++-
2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/src/marqo/core/inference/embedding_models/open_clip_model.py b/src/marqo/core/inference/embedding_models/open_clip_model.py
index fdc050316..99057a6ec 100644
--- a/src/marqo/core/inference/embedding_models/open_clip_model.py
+++ b/src/marqo/core/inference/embedding_models/open_clip_model.py
@@ -210,7 +210,8 @@ def _load_model_and_image_preprocessor_from_open_clip_repo(self) -> Tuple[torch.
def _load_tokenizer_from_checkpoint(self) -> Callable:
if not self.model_properties.tokenizer:
- return open_clip.get_tokenizer(self.model_properties.name)
+ # Replace '/'with '-' to support old clip model name style
+ return open_clip.get_tokenizer(self.model_properties.name.replace("/", "-"))
else:
logger.info(f"Custom HFTokenizer is provided. Loading...")
return HFTokenizer(self.model_properties.tokenizer)
diff --git a/tests/core/inference/test_open_clip_model_load.py b/tests/core/inference/test_open_clip_model_load.py
index 7ecd6bb80..62e6c5339 100644
--- a/tests/core/inference/test_open_clip_model_load.py
+++ b/tests/core/inference/test_open_clip_model_load.py
@@ -276,4 +276,15 @@ def test_load_OpenCLIPModel_with_auth_hf(self):
mock_download_model.assert_called_once_with(
repo_location=ModelLocation(**model_properties["model_location"]),
auth=model_auth,
- )
\ No newline at end of file
+ )
+
+ def test_load_legacy_openai_clip_model(self):
+ """A test to ensure old OpenAI CLIP models (e.g., ViT-B/32) are loaded correctly."""
+ model_properties = {
+ "name": "ViT-B/32", # Old OpenAI CLIP model name
+ "type": "open_clip",
+ "url": "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_e32-46683a32.pt",
+ "dimensions": 512
+ }
+ model = OPEN_CLIP(model_properties=model_properties, device="cpu")
+ model.load()
\ No newline at end of file