Merge pull request #141 from bespokelabsai/CURATOR-28-add-a-lite-llm-…

…backend-for-curator Add LiteLLM+instructor (for structured output) backend for curator
bespokelabsai · Dec 4, 2024 · 860b6b9 · 860b6b9
2 parents 44d5b72 + 2e0ac2c
commit 860b6b9
Show file tree

Hide file tree

Showing 8 changed files with 1,425 additions and 897 deletions.
diff --git a/examples/litellm_recipe_prompting.py b/examples/litellm_recipe_prompting.py
@@ -0,0 +1,52 @@
+from typing import List
+from pydantic import BaseModel, Field
+from bespokelabs import curator
+from datasets import Dataset
+
+
+def main():
+    # List of cuisines to generate recipes for
+    cuisines = [
+        {"cuisine": cuisine}
+        for cuisine in [
+            "Chinese",
+            "Italian",
+            "Mexican",
+            "French",
+            "Japanese",
+            "Indian",
+            "Thai",
+            "Korean",
+            "Vietnamese",
+            "Brazilian",
+        ]
+    ]
+    cuisines = Dataset.from_list(cuisines)
+
+    # Create prompter using LiteLLM backend
+    #############################################
+    # To use Gemini models:
+    # 1. Go to https://aistudio.google.com/app/apikey
+    # 2. Generate an API key
+    # 3. Set environment variable: GEMINI_API_KEY
+    #############################################
+
+    recipe_prompter = curator.Prompter(
+        model_name="gemini/gemini-1.5-flash",
+        prompt_func=lambda row: f"Generate a random {row['cuisine']} recipe. Be creative but keep it realistic.",
+        parse_func=lambda row, response: {
+            "recipe": response,
+            "cuisine": row["cuisine"],
+        },
+        backend="litellm",
+    )
+
+    # Generate recipes for all cuisines
+    recipes = recipe_prompter(cuisines)
+
+    # Print results
+    print(recipes.to_pandas())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/litellm_recipe_structured_output.py b/examples/litellm_recipe_structured_output.py
@@ -0,0 +1,71 @@
+from typing import List
+from pydantic import BaseModel, Field
+from bespokelabs import curator
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+# Define response format using Pydantic
+class Recipe(BaseModel):
+    title: str = Field(description="Title of the recipe")
+    ingredients: List[str] = Field(description="List of ingredients needed")
+    instructions: List[str] = Field(description="Step by step cooking instructions")
+    prep_time: int = Field(description="Preparation time in minutes")
+    cook_time: int = Field(description="Cooking time in minutes")
+    servings: int = Field(description="Number of servings")
+
+
+class Cuisines(BaseModel):
+    cuisines_list: List[str] = Field(description="A list of cuisines.")
+
+
+def main():
+    # We define a prompter that generates cuisines
+    #############################################
+    # To use Claude models:
+    # 1. Go to https://console.anthropic.com/settings/keys
+    # 2. Generate an API key or use an existing API key
+    # 3. Set environment variable: ANTHROPIC_API_KEY
+    #############################################
+    cuisines_generator = curator.Prompter(
+        prompt_func=lambda: f"Generate 10 diverse cuisines.",
+        model_name="claude-3-5-haiku-20241022",
+        response_format=Cuisines,
+        parse_func=lambda _, cuisines: [{"cuisine": t} for t in cuisines.cuisines_list],
+        backend="litellm",
+    )
+    cuisines = cuisines_generator()
+    print(cuisines.to_pandas())
+
+    #############################################
+    # To use Gemini models:
+    # 1. Go to https://aistudio.google.com/app/apikey
+    # 2. Generate an API key or use an existing API key
+    # 3. Set environment variable: GEMINI_API_KEY
+    #############################################
+    recipe_prompter = curator.Prompter(
+        model_name="gemini/gemini-1.5-flash",
+        prompt_func=lambda row: f"Generate a random {row['cuisine']} recipe. Be creative but keep it realistic.",
+        parse_func=lambda row, response: {
+            "title": response.title,
+            "ingredients": response.ingredients,
+            "instructions": response.instructions,
+            "prep_time": response.prep_time,
+            "cook_time": response.cook_time,
+            "servings": response.servings,
+            "cuisine": row["cuisine"],
+        },
+        response_format=Recipe,
+        backend="litellm",
+    )
+
+    # Generate recipes for all cuisines
+    recipes = recipe_prompter(cuisines)
+
+    # Print results
+    print(recipes.to_pandas())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/poetry.lock b/poetry.lock
diff --git a/src/bespokelabs/curator/prompter/prompter.py b/src/bespokelabs/curator/prompter/prompter.py
@@ -21,6 +21,9 @@
 from bespokelabs.curator.request_processor.openai_online_request_processor import (
     OpenAIOnlineRequestProcessor,
 )
+from bespokelabs.curator.request_processor.litellm_online_request_processor import (
+    LiteLLMOnlineRequestProcessor,
+)
 
 _CURATOR_DEFAULT_CACHE_DIR = "~/.cache/curator"
 T = TypeVar("T")
@@ -31,6 +34,40 @@
 class Prompter:
     """Interface for prompting LLMs."""
 
+    @staticmethod
+    def _determine_backend(
+        model_name: str, response_format: Optional[Type[BaseModel]] = None
+    ) -> str:
+        """Determine which backend to use based on model name and response format.
+
+        Args:
+            model_name (str): Name of the model
+            response_format (Optional[Type[BaseModel]]): Response format if specified
+
+        Returns:
+            str: Backend to use ("openai" or "litellm")
+        """
+        model_name = model_name.lower()
+
+        # GPT-4o models with response format should use OpenAI
+        if (
+            response_format
+            and OpenAIOnlineRequestProcessor(model_name).check_structured_output_support()
+        ):
+            logger.info(f"Requesting structured output from {model_name}, using OpenAI backend")
+            return "openai"
+
+        # GPT models and O1 models without response format should use OpenAI
+        if not response_format and any(x in model_name for x in ["gpt-", "o1-preview", "o1-mini"]):
+            logger.info(f"Requesting text output from {model_name}, using OpenAI backend")
+            return "openai"
+
+        # Default to LiteLLM for all other cases
+        logger.info(
+            f"Requesting {f'structured' if response_format else 'text'} output from {model_name}, using LiteLLM backend"
+        )
+        return "litellm"
+
     def __init__(
         self,
         model_name: str,
@@ -45,6 +82,7 @@ def __init__(
             ]
         ] = None,
         response_format: Optional[Type[BaseModel]] = None,
+        backend: Optional[str] = None,
         batch: bool = False,
         batch_size: Optional[int] = None,
         temperature: Optional[float] = None,
@@ -64,6 +102,7 @@ def __init__(
                 response object and returns the parsed output
             response_format (Optional[Type[BaseModel]]): A Pydantic model specifying the
                 response format from the LLM.
+            backend (Optional[str]): The backend to use ("openai" or "litellm"). If None, will be auto-determined
             batch (bool): Whether to use batch processing
             batch_size (Optional[int]): The size of the batch to use, only used if batch is True
             temperature (Optional[float]): The temperature to use for the LLM, only used if batch is False
@@ -88,15 +127,49 @@ def __init__(
             model_name, prompt_func, parse_func, response_format
         )
         self.batch_mode = batch
-        if batch:
-            if batch_size is None:
-                batch_size = 1_000
-                logger.info(
-                    f"batch=True but no batch_size provided, using default batch_size of {batch_size:,}"
+
+        # Auto-determine backend if not specified
+        # Use provided backend or auto-determine based on model and format
+        if backend is not None:
+            self.backend = backend
+        else:
+            self.backend = self._determine_backend(model_name, response_format)
+
+        # Select request processor based on backend
+        if self.backend == "openai":
+            if batch:
+                if batch_size is None:
+                    batch_size = 1_000
+                    logger.info(
+                        f"batch=True but no batch_size provided, using default batch_size of {batch_size:,}"
+                    )
+                self._request_processor = OpenAIBatchRequestProcessor(
+                    model=model_name,
+                    batch_size=batch_size,
+                    temperature=temperature,
+                    top_p=top_p,
+                    presence_penalty=presence_penalty,
+                    frequency_penalty=frequency_penalty,
+                )
+            else:
+                if batch_size is not None:
+                    logger.warning(
+                        f"Prompter argument `batch_size` {batch_size} is ignored because `batch` is False"
+                    )
+                self._request_processor = OpenAIOnlineRequestProcessor(
+                    model=model_name,
+                    temperature=temperature,
+                    top_p=top_p,
+                    presence_penalty=presence_penalty,
+                    frequency_penalty=frequency_penalty,
                 )
-            self._request_processor = OpenAIBatchRequestProcessor(
+        elif self.backend == "litellm":
+            if batch:
+                logger.warning(
+                    "Batch mode is not supported with LiteLLM backend, ignoring batch=True"
+                )
+            self._request_processor = LiteLLMOnlineRequestProcessor(
                 model=model_name,
-                batch_size=batch_size,
                 temperature=temperature,
                 top_p=top_p,
                 presence_penalty=presence_penalty,
@@ -105,17 +178,7 @@ def __init__(
                 delete_failed_batch_files=delete_failed_batch_files,
             )
         else:
-            if batch_size is not None:
-                logger.warning(
-                    f"Prompter argument `batch_size` {batch_size} is ignored because `batch` is False"
-                )
-            self._request_processor = OpenAIOnlineRequestProcessor(
-                model=model_name,
-                temperature=temperature,
-                top_p=top_p,
-                presence_penalty=presence_penalty,
-                frequency_penalty=frequency_penalty,
-            )
+            raise ValueError(f"Unknown backend: {self.backend}")
 
     def __call__(self, dataset: Optional[Iterable] = None, working_dir: str = None) -> Dataset:
         """
@@ -180,6 +243,7 @@ def _completions(
                     else "text"
                 ),
                 str(self.batch_mode),
+                str(self.backend),
             ]
         )