diff --git a/outlines/generate/choice.py b/outlines/generate/choice.py index 931409a59..595513d52 100644 --- a/outlines/generate/choice.py +++ b/outlines/generate/choice.py @@ -1,3 +1,4 @@ +import json as pyjson from functools import singledispatch from typing import Callable, List @@ -5,6 +6,7 @@ from outlines.models import OpenAI from outlines.samplers import Sampler, multinomial +from .json import json from .regex import regex @@ -24,13 +26,22 @@ def choice( def choice_openai( model: OpenAI, choices: List[str], sampler: Sampler = multinomial() ) -> Callable: - if not isinstance(sampler, multinomial): - raise NotImplementedError( - r"The OpenAI API does not support any other sampling algorithm " - + "that the multinomial sampler." - ) - - def generate_choice(prompt: str, max_tokens: int = 1): - return model.generate_choice(prompt, choices, max_tokens) + """ + Call OpenAI API with response_format of a dict: + {"result": } + """ + + choices_schema = pyjson.dumps( + { + "type": "object", + "properties": {"result": {"type": "string", "enum": choices}}, + "additionalProperties": False, + "required": ["result"], + } + ) + generator = json(model, choices_schema, sampler) + + def generate_choice(*args, **kwargs): + return generator(*args, **kwargs)["result"] return generate_choice diff --git a/outlines/generate/json.py b/outlines/generate/json.py index 6209840e2..f75878d29 100644 --- a/outlines/generate/json.py +++ b/outlines/generate/json.py @@ -70,9 +70,39 @@ def json( @json.register(OpenAI) def json_openai( - model, schema_object: Union[str, object, Callable], sampler: Sampler = multinomial() + model, schema_object: Union[str, object], sampler: Sampler = multinomial() ): - raise NotImplementedError( - "Cannot use JSON Schema-structure generation with an OpenAI model " - + "due to the limitations of the OpenAI API" + if not isinstance(sampler, multinomial): + raise NotImplementedError( + r"The OpenAI API does not support any other sampling algorithm " + + "than the multinomial sampler." + ) + + if isinstance(schema_object, type(BaseModel)): + schema = pyjson.dumps(schema_object.model_json_schema()) + format_sequence = lambda x: schema_object.parse_raw(x) + elif isinstance(schema_object, str): + schema = schema_object + format_sequence = lambda x: pyjson.loads(x) + else: + raise ValueError( + f"Cannot parse schema {schema_object}. The schema must be either " + + "a Pydantic object, a function or a string that contains the JSON " + + "Schema specification" + ) + + # create copied, patched model with normalized json schema set + generator = model.new_with_replacements( + response_format={ + "type": "json_schema", + "json_schema": { + "name": "default", + "strict": True, + "schema": pyjson.loads(schema), + }, + } ) + + generator.format_sequence = format_sequence + + return generator diff --git a/outlines/models/openai.py b/outlines/models/openai.py index 692e64ac5..89c26f217 100644 --- a/outlines/models/openai.py +++ b/outlines/models/openai.py @@ -1,9 +1,8 @@ """Integration with OpenAI's API.""" +import copy import functools -import warnings from dataclasses import asdict, dataclass, field, replace -from itertools import zip_longest -from typing import Callable, Dict, List, Optional, Set, Tuple, Union +from typing import Callable, Dict, List, Optional, Tuple, Union import numpy as np @@ -74,7 +73,6 @@ def __init__( self, client, config, - tokenizer=None, system_prompt: Optional[str] = None, ): """Create an `OpenAI` instance. @@ -89,13 +87,9 @@ def __init__( config An instance of `OpenAIConfig`. Can be useful to specify some parameters that cannot be set by calling this class' methods. - tokenizer - The tokenizer associated with the model the client connects to. - """ self.client = client - self.tokenizer = tokenizer self.config = config # We count the total number of prompt and generated tokens as returned @@ -104,6 +98,8 @@ def __init__( self.prompt_tokens = 0 self.completion_tokens = 0 + self.format_sequence = lambda seq: seq + def __call__( self, prompt: Union[str, List[str]], @@ -152,107 +148,17 @@ def __call__( self.prompt_tokens += prompt_tokens self.completion_tokens += completion_tokens - return response + return self.format_sequence(response) def stream(self, *args, **kwargs): raise NotImplementedError( "Streaming is currently not supported for the OpenAI API" ) - def generate_choice( - self, - prompt: str, - choices: List[str], - max_tokens: Optional[int] = None, - system_prompt: Optional[str] = None, - ) -> str: - """Call the OpenAI API to generate one of several choices. - - Parameters - ---------- - prompt - A string or list of strings that will be used to prompt the model - choices - The list of strings between which we ask the model to choose - max_tokens - The maximum number of tokens to generate - system_prompt - The content of the system message that precedes the user's prompt. - - """ - if self.tokenizer is None: - raise ValueError( - "You must initialize the `OpenAI` class with a tokenizer to use `outlines.generate.choice`" - ) - - config = replace(self.config, max_tokens=max_tokens) - - greedy = False - decoded: List[str] = [] - encoded_choices_left: List[List[int]] = [ - self.tokenizer.encode(word) for word in choices - ] - - while len(encoded_choices_left) > 0: - max_tokens_left = max([len(tokens) for tokens in encoded_choices_left]) - transposed_choices_left: List[Set] = [ - {item for item in subset if item is not None} - for subset in zip_longest(*encoded_choices_left) - ] - - if not greedy: - mask = build_optimistic_mask(transposed_choices_left) - else: - mask = {} - for token in transposed_choices_left[0]: # build greedy mask - mask[token] = 100 - - if len(mask) == 0: - break - - config = replace(config, logit_bias=mask, max_tokens=max_tokens_left) - - response, prompt_tokens, completion_tokens = generate_chat( - prompt, system_prompt, self.client, config - ) - self.prompt_tokens += prompt_tokens - self.completion_tokens += completion_tokens - - encoded_response = self.tokenizer.encode(response) - - if encoded_response in encoded_choices_left: - decoded.append(response) - break - else: - ( - encoded_response, - encoded_choices_left, - ) = find_response_choices_intersection( - encoded_response, encoded_choices_left - ) - - if len(encoded_response) == 0: - greedy = True # next iteration will be "greedy" - continue - else: - decoded.append("".join(self.tokenizer.decode(encoded_response))) - - if len(encoded_choices_left) == 1: # only one choice left - choice_left = self.tokenizer.decode(encoded_choices_left[0]) - decoded.append(choice_left) - break - - greedy = False # after each success, stay with (or switch to) "optimistic" approach - - prompt = prompt + "".join(decoded) - - choice = "".join(decoded) - - return choice - - def generate_json(self): - """Call the OpenAI API to generate a JSON object.""" - raise NotImplementedError + def new_with_replacements(self, **kwargs): + new_instance = copy.copy(self) + new_instance.config = replace(new_instance.config, **kwargs) + return new_instance def __str__(self): return self.__class__.__name__ + " API" @@ -313,81 +219,6 @@ async def call_api(prompt, system_prompt, config): return results, usage["prompt_tokens"], usage["completion_tokens"] -def find_longest_intersection(response: List[int], choice: List[int]) -> List[int]: - """Find the longest intersection between the response and the choice.""" - for i, (token_r, token_c) in enumerate(zip_longest(response, choice)): - if token_r != token_c: - return response[:i] - - return response - - -def find_response_choices_intersection( - response: List[int], choices: List[List[int]] -) -> Tuple[List[int], List[List[int]]]: - """Find the longest intersection between the response and the different - choices. - - Say the response is of the form `[1, 2, 3, 4, 5]` and we have the choices - `[[1, 2], [1, 2, 3], [6, 7, 8]` then the function will return `[1, 2, 3]` as the - intersection, and `[[]]` as the list of choices left. - - Parameters - ---------- - response - The model's response - choices - The remaining possible choices - - Returns - ------- - A tuple that contains the longest intersection between the response and the - different choices, and the choices which start with this intersection, with the - intersection removed. - - """ - max_len_prefix = 0 - choices_left = [] - longest_prefix = [] - for i, choice in enumerate(choices): - # Find the longest intersection between the response and the choice. - prefix = find_longest_intersection(response, choice) - - if len(prefix) > max_len_prefix: - max_len_prefix = len(prefix) - choices_left = [choice[len(prefix) :]] - longest_prefix = prefix - - elif len(prefix) == max_len_prefix: - choices_left.append(choice[len(prefix) :]) - - return longest_prefix, choices_left - - -def build_optimistic_mask( - transposed: List[Set[int]], max_mask_size: int = 300 -) -> Dict[int, int]: - """We build the largest mask possible. - - Tokens are added from left to right, so if the encoded choices are e.g. - `[[1,2], [3,4]]`, `1` and `3` will be added before `2` and `4`. - - Parameters - ---------- - transposed - A list of lists that contain the nth token of each choice. - - """ - mask: Dict[int, int] = {} - for tokens in transposed: - for token in tokens: - if len(mask) == max_mask_size: - return mask - mask[token] = 100 - - return mask - - def error_handler(api_call_fn: Callable) -> Callable: """Handle OpenAI API errors and missing API key.""" @@ -427,11 +258,10 @@ def openai_model( **openai_client_params, ): try: - import tiktoken from openai import AsyncOpenAI except ImportError: raise ImportError( - "The `openai` and `tiktoken` libraries needs to be installed in order to use Outlines' OpenAI integration." + "The `openai` library needs to be installed in order to use Outlines' OpenAI integration." ) if config is not None: @@ -441,15 +271,7 @@ def openai_model( client = AsyncOpenAI(**openai_client_params) - try: - tokenizer = tiktoken.encoding_for_model(model_name) - except KeyError: - warnings.warn( - f"Could not find a tokenizer for model {model_name}. Using default cl100k_base." - ) - tokenizer = tiktoken.get_encoding("cl100k_base") - - return OpenAI(client, config, tokenizer) + return OpenAI(client, config) def azure_openai( @@ -459,11 +281,10 @@ def azure_openai( **azure_openai_client_params, ): try: - import tiktoken from openai import AsyncAzureOpenAI except ImportError: raise ImportError( - "The `openai` and `tiktoken` libraries needs to be installed in order to use Outlines' Azure OpenAI integration." + "The `openai` library needs to be installed in order to use Outlines' Azure OpenAI integration." ) if config is not None: @@ -473,12 +294,4 @@ def azure_openai( client = AsyncAzureOpenAI(**azure_openai_client_params) - try: - tokenizer = tiktoken.encoding_for_model(model_name or deployment_name) - except KeyError: - warnings.warn( - f"Could not find a tokenizer for model {model_name or deployment_name}. Using default cl100k_base." - ) - tokenizer = tiktoken.get_encoding("cl100k_base") - - return OpenAI(client, config, tokenizer) + return OpenAI(client, config) diff --git a/pyproject.toml b/pyproject.toml index 4d87e1fca..937b0c9c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -122,7 +122,6 @@ module = [ "pydantic.*", "pytest", "referencing.*", - "tiktoken.*", "torch.*", "transformers.*", "llama_cpp", diff --git a/tests/models/test_openai.py b/tests/models/test_openai.py index 8a7870efd..f4b97f36a 100644 --- a/tests/models/test_openai.py +++ b/tests/models/test_openai.py @@ -1,17 +1,14 @@ import importlib +import json +from contextlib import contextmanager from unittest import mock from unittest.mock import MagicMock import pytest from openai import AsyncOpenAI -from outlines.models.openai import ( - OpenAI, - OpenAIConfig, - build_optimistic_mask, - find_longest_intersection, - find_response_choices_intersection, -) +from outlines import generate +from outlines.models.openai import OpenAI, OpenAIConfig def module_patch(path): @@ -62,44 +59,67 @@ def test_openai_call(): assert mocked_generate_chat_arg_config.n == 3 -@pytest.mark.parametrize( - "response,choice,expected_intersection,expected_choices_left", - ( - ([1, 2, 3, 4], [[5, 6]], [], [[5, 6]]), - ([1, 2, 3, 4], [[5, 6], [7, 8]], [], [[5, 6], [7, 8]]), - ([1, 2, 3, 4], [[1, 2], [7, 8]], [1, 2], [[]]), - ([1, 2], [[1, 2, 3, 4], [1, 2]], [1, 2], [[3, 4], []]), - ([1, 2, 3], [[1, 2, 3, 4], [1, 2]], [1, 2, 3], [[4]]), - ), -) -def test_find_response_choices_intersection( - response, choice, expected_intersection, expected_choices_left -): - intersection, choices_left = find_response_choices_intersection(response, choice) - assert intersection == expected_intersection - assert choices_left == expected_choices_left - - -@pytest.mark.parametrize( - "response,choice,expected_prefix", - ( - ([1, 2, 3], [1, 2, 3, 4], [1, 2, 3]), - ([1, 2, 3], [1, 2, 3], [1, 2, 3]), - ([4, 5], [1, 2, 3], []), - ), -) -def test_find_longest_common_prefix(response, choice, expected_prefix): - prefix = find_longest_intersection(response, choice) - assert prefix == expected_prefix - - -@pytest.mark.parametrize( - "transposed,mask_size,expected_mask", - ( - ([{1, 2}, {3, 4}], 3, {1: 100, 2: 100, 3: 100}), - ([{1, 2}, {3, 4}], 4, {1: 100, 2: 100, 3: 100, 4: 100}), - ), -) -def test_build_optimistic_mask(transposed, mask_size, expected_mask): - mask = build_optimistic_mask(transposed, mask_size) - assert mask == expected_mask +@contextmanager +def patched_openai(completion, **oai_config): + """Create a patched openai whose chat completions always returns `completion`""" + with module_patch("outlines.models.openai.generate_chat") as mocked_generate_chat: + mocked_generate_chat.return_value = completion, 1, 2 + async_client = MagicMock(spec=AsyncOpenAI, api_key="key") + model = OpenAI( + async_client, + OpenAIConfig(max_tokens=10, temperature=0.5, n=2, stop=["."]), + ) + yield model + + +def test_openai_choice_call(): + with patched_openai(completion='{"result": "foo"}') as model: + generator = generate.choice(model, ["foo", "bar"]) + assert generator("hi") == "foo" + + +def test_openai_choice_call_invalid_server_response(): + with patched_openai(completion="not actual json") as model: + generator = generate.choice(model, ["foo", "bar"]) + with pytest.raises(json.decoder.JSONDecodeError): + generator("hi") + + +def test_openai_json_call_pydantic(): + from pydantic import BaseModel, ConfigDict, ValidationError + + class Person(BaseModel): + model_config = ConfigDict(extra="forbid") # required for openai + first_name: str + last_name: str + age: int + + completion = '{"first_name": "Usain", "last_name": "Bolt", "age": 38}' + + # assert success for valid response + with patched_openai(completion=completion) as model: + generator = generate.json(model, Person) + assert generator("fastest person") == Person.parse_raw(completion) + + # assert fail for non-json response + with patched_openai(completion="usain bolt") as model: + generator = generate.json(model, Person) + with pytest.raises(ValidationError): + assert generator("fastest person") + + +def test_openai_json_call_str(): + person_schema = '{"additionalProperties": false, "properties": {"first_name": {"title": "First Name", "type": "string"}, "last_name": {"title": "Last Name", "type": "string"}, "age": {"title": "Age", "type": "integer"}}, "required": ["first_name", "last_name", "age"], "title": "Person", "type": "object"}' + + output = {"first_name": "Usain", "last_name": "Bolt", "age": 38} + + # assert success for valid response + with patched_openai(completion=json.dumps(output)) as model: + generator = generate.json(model, person_schema) + assert generator("fastest person") == output + + # assert fail for non-json response + with patched_openai(completion="usain bolt") as model: + generator = generate.json(model, person_schema) + with pytest.raises(json.decoder.JSONDecodeError): + assert generator("fastest person")