diff --git a/examples/amazon/generate_slugs.py b/examples/amazon/generate_slugs.py index 95fe76f..f98e602 100644 --- a/examples/amazon/generate_slugs.py +++ b/examples/amazon/generate_slugs.py @@ -16,7 +16,7 @@ logging.getLogger("openai._base_client").disabled = True logging.getLogger("paramiko.transport").disabled = True logging.getLogger("anthropic._base_client").disabled = True -logging.getLogger("langdspy").disabled = True +# logging.getLogger("langdspy").disabled = True import langdspy import httpx @@ -32,7 +32,7 @@ def get_llm(): FAST_OPENAI_MODEL = os.getenv("FAST_OPENAI_MODEL") ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY") OPENAI_TEMPERATURE = os.getenv("OPENAI_TEMPERATURE") - FAST_MODEL_PROVIDER = os.getenv("FAST_MODEL_PROVIDER") + FAST_MODEL_PROVIDER = os.getenv("FAST_MODEL_PROVIDER", "") FAST_ANTHROPIC_MODEL = os.getenv("FAST_ANTHROPIC_MODEL") FAST_GROQ_MODEL = os.getenv("FAST_GROQ_MODEL") GROQ_API_KEY = os.getenv("GROQ_API_KEY") @@ -82,14 +82,14 @@ def cosine_similarity_tfidf(true_slugs, predicted_slugs): similarity_scores = cosine_similarity(true_vectors, predicted_vectors) return similarity_scores.diagonal() -def slug_similarity(true_slugs, predicted_slugs): +def slug_similarity(X, true_slugs, predicted_slugs): similarity_scores = cosine_similarity_tfidf(true_slugs, predicted_slugs) average_similarity = sum(similarity_scores) / len(similarity_scores) return average_similarity def evaluate_model(model, X, y): predicted_slugs = model.predict(X, llm) - accuracy = slug_similarity(y, predicted_slugs) + accuracy = slug_similarity(X, y, predicted_slugs) return accuracy llm = get_llm() @@ -105,24 +105,24 @@ def evaluate_model(model, X, y): X_test = dataset['test']['X'] y_test = dataset['test']['y'] - model = ProductSlugGenerator(n_jobs=4, print_prompt=False) + model = ProductSlugGenerator(n_jobs=1, print_prompt=True) # model.generate_slug.set_model_kwargs({'print_prompt': True}) before_test_accuracy = None if os.path.exists(output_path): model.load(output_path) else: - input("Hit enter to evaluate the untrained model...") + # input("Hit enter to evaluate the untrained model...") before_test_accuracy = evaluate_model(model, X_test, y_test) print(f"Before Training Accuracy: {before_test_accuracy}") - input("Hit enter to train the model...") - model.fit(X_train, y_train, score_func=slug_similarity, llm=llm, n_examples=3, n_iter=500) + # input("Hit enter to train the model...") + # model.fit(X_train, y_train, score_func=slug_similarity, llm=llm, n_examples=3, n_iter=500) - input("Hit enter to evaluate the trained model...") - # Evaluate the model on the test set - test_accuracy = evaluate_model(model, X_test, y_test) - print(f"Before Training Accuracy: {before_test_accuracy}") - print(f"After Training Accuracy: {test_accuracy}") + # input("Hit enter to evaluate the trained model...") + # # Evaluate the model on the test set + # test_accuracy = evaluate_model(model, X_test, y_test) + # print(f"Before Training Accuracy: {before_test_accuracy}") + # print(f"After Training Accuracy: {test_accuracy}") model.save(output_path) \ No newline at end of file diff --git a/langdspy/field_descriptors.py b/langdspy/field_descriptors.py index 558138e..5715ec0 100644 --- a/langdspy/field_descriptors.py +++ b/langdspy/field_descriptors.py @@ -11,7 +11,6 @@ class FieldDescriptor: def __init__(self, name:str, desc: str, formatter: Optional[Callable[[Any], Any]] = None, transformer: Optional[Callable[[Any], Any]] = None, validator: Optional[Callable[[Any], Any]] = None, **kwargs): assert "⏎" not in name, "Field name cannot contain newline character" assert ":" not in name, "Field name cannot contain colon character" - self.name = name self.desc = desc self.formatter = formatter @@ -19,7 +18,6 @@ def __init__(self, name:str, desc: str, formatter: Optional[Callable[[Any], Any] self.validator = validator self.kwargs = kwargs - def format_value(self, value: Any) -> Any: if self.formatter: return self.formatter(value, self.kwargs) @@ -39,81 +37,105 @@ def validate_value(self, input: Input, value: Any) -> bool: return True class HintField(FieldDescriptor): - HINT_TOKEN = "💡" - + HINT_TOKEN_OPENAI = "💡" + HINT_TOKEN_ANTHROPIC = None def __init__(self, desc: str, formatter: Optional[Callable[[Any], Any]] = None, transformer: Optional[Callable[[Any], Any]] = None, validator: Optional[Callable[[Any], Any]] = None, **kwargs): # Provide a default value for the name parameter, such as an empty string super().__init__("", desc, formatter, transformer, validator, **kwargs) - - def format_prompt_description(self): - return f"{self.HINT_TOKEN} {self.desc}" - - - def format_prompt_description(self): - return f"{self.HINT_TOKEN} {self.desc}" - + def _start_format_openai(self): + return f"{self.HINT_TOKEN_OPENAI}" + def _start_format_anthropic(self): + return f"" + def format_prompt_description(self, llm_type: str): + if llm_type == "openai": + return f"{self._start_format_openai()} {self.desc}" + elif llm_type == "anthropic": + return f"{self._start_format_anthropic()}{self.desc}" class InputField(FieldDescriptor): - START_TOKEN = "✅" - - def _start_format(self): - return f"{self.START_TOKEN}{self.name}" - - def format_prompt_description(self): - return f"{self._start_format()}: {self.desc}" - - def format_prompt_value(self, value): + START_TOKEN_OPENAI = "✅" + START_TOKEN_ANTHROPIC = None + def _start_format_openai(self): + return f"{self.START_TOKEN_OPENAI}{self.name}" + def _start_format_anthropic(self): + return f"<{self.name}>" + def format_prompt_description(self, llm_type: str): + if llm_type == "openai": + return f"{self._start_format_openai()}: {self.desc}" + elif llm_type == "anthropic": + return f"{self._start_format_anthropic()}{self.desc}" + def format_prompt_value(self, value, llm_type: str): value = self.format_value(value) - return f"{self._start_format()}: {value}" + if llm_type == "openai": + return f"{self._start_format_openai()}: {value}" + elif llm_type == "anthropic": + return f"{self._start_format_anthropic()}{value}" class InputFieldList(InputField): - def format_prompt_description(self): - return f"{self._start_format()}: {self.desc}" - - def format_prompt_value(self, value): + def format_prompt_description(self, llm_type: str): + if llm_type == "openai": + return f"{self._start_format_openai()}: {self.desc}" + elif llm_type == "anthropic": + return f"{self._start_format_anthropic()}{self.desc}" + def format_prompt_value(self, value, llm_type: str): res = "" if len(value) >= 1: + if llm_type == "anthropic": + res += f"<{self.name}>\n" for i, value in enumerate(value): if i > 0: res += "\n" value = self.format_value(value) - res += f"{self.START_TOKEN} [{i}]: {value}" + if llm_type == "openai": + res += f"{self.START_TOKEN_OPENAI} [{i}]: {value}" + elif llm_type == "anthropic": + res += f"{value}" + if llm_type == "anthropic": + res += f"\n" else: - res += f"{self._start_format()}: NO VALUES SPECIFIED" - - + if llm_type == "openai": + res += f"{self._start_format_openai()}: NO VALUES SPECIFIED" + elif llm_type == "anthropic": + res += f"{self._start_format_anthropic()}NO VALUES SPECIFIED" return res class OutputField(FieldDescriptor): - START_TOKEN = "🔑" - - def _start_format(self): - return f"{self.START_TOKEN}{self.name}" - - def format_prompt_description(self): - return f"{self._start_format()}: {self.desc}" - - def format_prompt_value(self, value): + START_TOKEN_OPENAI = "🔑" + START_TOKEN_ANTHROPIC = None + def _start_format_openai(self): + return f"{self.START_TOKEN_OPENAI}{self.name}" + def _start_format_anthropic(self): + return f"<{self.name}>" + def format_prompt_description(self, llm_type: str): + if llm_type == "openai": + return f"{self._start_format_openai()}: {self.desc}" + elif llm_type == "anthropic": + return f"{self._start_format_anthropic()}{self.desc}" + def format_prompt_value(self, value, llm_type: str): value = self.format_value(value) - return f"{self._start_format()}: {value}" - - def format_prompt(self): - return f"{self._start_format()}:" + if llm_type == "openai": + return f"{self._start_format_openai()}: {value}" + elif llm_type == "anthropic": + return f"{self._start_format_anthropic()}{value}" + def format_prompt(self, llm_type: str): + if llm_type == "openai": + return f"{self._start_format_openai()}:" + elif llm_type == "anthropic": + return f"{self._start_format_anthropic()}" class OutputFieldEnum(OutputField): def __init__(self, name: str, desc: str, enum: Enum, **kwargs): kwargs['enum'] = enum - if not 'transformer' in kwargs: kwargs['transformer'] = transformers.as_enum - if not 'validator' in kwargs: kwargs['validator'] = validators.is_one_of kwargs['choices'] = [e.name for e in enum] - super().__init__(name, desc, **kwargs) - - def format_prompt_description(self): + def format_prompt_description(self, llm_type: str): enum = self.kwargs.get('enum') choices_str = ", ".join([e.name for e in enum]) - return f"{self._start_format()}: One of: {choices_str} - {self.desc}" + if llm_type == "openai": + return f"{self._start_format_openai()}: One of: {choices_str} - {self.desc}" + elif llm_type == "anthropic": + return f"{self._start_format_anthropic()}One of: {choices_str} - {self.desc}" \ No newline at end of file diff --git a/langdspy/lcel_logger.py b/langdspy/lcel_logger.py deleted file mode 100644 index fc1235c..0000000 --- a/langdspy/lcel_logger.py +++ /dev/null @@ -1,81 +0,0 @@ -import logging -from typing import Any, Optional -from uuid import UUID - -from typing import Any, Dict, List -from langchain_core.exceptions import TracerException -from langchain_core.callbacks.base import BaseCallbackHandler -from langchain_core.tracers.stdout import FunctionCallbackHandler -from langchain_core.utils.input import get_bolded_text, get_colored_text - -from langchain_core.outputs import LLMResult - - -class LlmDebugHandler(BaseCallbackHandler): - @property - def always_verbose(self) -> bool: - """Whether to call verbose callbacks even if verbose is False.""" - return True - - def on_llm_start( - self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any - ) -> None: - """Print out the prompts.""" - logger = logging.getLogger(__name__) - try: - logger.debug(f"LLM Start: {serialized} {prompts}") - for i, prompt in enumerate(prompts): - logger.debug(f" Prompt {i}: {prompt}") - except Exception as e: - logger.error(f"An error occurred in on_llm_start: {e}") - - def on_llm_new_token(self, token: str, **kwargs: Any) -> None: - """Print out the token.""" - logger = logging.getLogger(__name__) - try: - logger.debug(f"LLM Token: {token}") - except Exception as e: - logger.error(f"An error occurred in on_llm_new_token: {e}") - - def __copy__(self) -> "LlmDebugHandler": - """Return a copy of the callback handler.""" - logger = logging.getLogger(__name__) - try: - return self - except Exception as e: - logger.error(f"An error occurred in __copy__: {e}") - - def __deepcopy__(self, memo: Any) -> "LlmDebugHandler": - """Return a deep copy of the callback handler.""" - logger = logging.getLogger(__name__) - try: - return self - except Exception as e: - logger.error(f"An error occurred in __deepcopy__: {e}") - - def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None: - """Collect token usage.""" - logger = logging.getLogger(__name__) - try: - logger.debug(f"LLM Result: {response}") - for f in response.generations: - for gen in f: - logger.debug(f" Generation: {gen.text}") - except Exception as e: - logger.error(f"An error occurred in on_llm_end: {e}") - - def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None: - """Collect token usage.""" - logger = logging.getLogger(__name__) - logger.debug(f"LLM Result: {response}") - for f in response.generations: - for gen in f: - logger.debug(f" Generation: {gen.text}") - - def __copy__(self) -> "LlmDebugHandler": - """Return a copy of the callback handler.""" - return self - - def __deepcopy__(self, memo: Any) -> "LlmDebugHandler": - """Return a deep copy of the callback handler.""" - return self \ No newline at end of file diff --git a/langdspy/model.py b/langdspy/model.py index c249178..fd5bd1f 100644 --- a/langdspy/model.py +++ b/langdspy/model.py @@ -66,6 +66,11 @@ def save(self, filepath): def load(self, filepath): with open(filepath, 'rb') as file: self.trained_state = pickle.load(file) + setattr(self, 'trained_state', self.trained_state) + self.kwargs = {**self.kwargs, 'trained_state': self.trained_state} + + for runner_name, runner in self.prompt_runners: + runner.set_model_kwargs(self.kwargs) def predict(self, X, llm): @@ -104,7 +109,7 @@ def evaluate_subset(subset): }) for item in scoring_X ) - score = score_func(scoring_y, predicted_slugs) + score = score_func(scoring_X, scoring_y, predicted_slugs) logger.debug(f"Training subset scored {score}") return score, subset diff --git a/langdspy/prompt_runners.py b/langdspy/prompt_runners.py index c71ad80..ad04bd4 100644 --- a/langdspy/prompt_runners.py +++ b/langdspy/prompt_runners.py @@ -4,6 +4,8 @@ from langchain_core.output_parsers import StrOutputParser from langchain_core.pydantic_v1 import BaseModel, Field, create_model, root_validator, Extra, PrivateAttr from langchain_core.pydantic_v1 import validator +from langchain_openai import ChatOpenAI +from langchain_anthropic import ChatAnthropic from typing import Any, Dict, List, Type, Optional, Callable from abc import ABC, abstractmethod from concurrent.futures import ThreadPoolExecutor, as_completed @@ -58,18 +60,54 @@ def check_template( def set_model_kwargs(self, model_kwargs): self.model_kwargs.update(model_kwargs) + + def _determine_llm_type(self, llm): + if isinstance(llm, ChatOpenAI): # Assuming OpenAILLM is the class for OpenAI models + return 'openai' + elif isinstance(llm, ChatAnthropic): # Assuming AnthropicLLM is the class for Anthropic models + return 'anthropic' + else: + return 'openai' # Default to OpenAI if model type cannot be determined + def _invoke_with_retries(self, chain, input, max_tries=1, config: Optional[RunnableConfig] = {}): total_max_tries = max_tries hard_fail = config.get('hard_fail', False) + llm_type = config.get('llm_type') # Get the LLM type from the configuration + if llm_type is None: + llm_type = self._determine_llm_type(config['llm']) # Auto-detect the LLM type if not specified + + logger.debug(f"LLM type: {llm_type}") res = {} while max_tries >= 1: try: kwargs = {**self.model_kwargs, **self.kwargs} - res = chain.invoke({**input, 'trained_state': config.get('trained_state', None), 'print_prompt': config.get('print_prompt', False), **kwargs}, config=config) + # logger.debug(f"PromptRunner invoke with input {input} and kwargs {kwargs} and config {config}") + # logger.debug(f"Prompt runner kwargs: {kwargs}") + trained_state = config.get('trained_state', None) + # logger.debug(f"1 - Trained state is {trained_state}") + if not trained_state or not trained_state.examples: + # logger.debug(f"2 - Trained state is {trained_state}") + trained_state = self.model_kwargs.get('trained_state', None) + # logger.debug(f"3 - Trained state is {trained_state}") + + if not trained_state or not trained_state.examples: + _trained_state = self.kwargs.get('trained_state', None) + if not trained_state: + trained_state = _trained_state + # logger.debug(f"4 - Trained state is {trained_state}") + + print_prompt = kwargs.get('print_prompt', config.get('print_prompt', False)) + # logger.debug(f"Print prompt {print_prompt} kwargs print prompt {kwargs.get('print_prompt')} config print prompt {config.get('print_prompt')}") + + # logger.debug(f"PromptRunner invoke with trained_state {trained_state}") + invoke_args = {**input, 'print_prompt': print_prompt, **kwargs, 'trained_state': trained_state, 'use_training': config.get('use_training', True), 'llm_type': llm_type} + + # logger.debug(f"Invoke args: {invoke_args}") + res = chain.invoke(invoke_args, config=config) except Exception as e: import traceback traceback.print_exc() @@ -78,14 +116,17 @@ def _invoke_with_retries(self, chain, input, max_tries=1, config: Optional[Runna max_tries -= 1 continue + validation = True # logger.debug(f"Raw output for prompt runner {self.template.__class__.__name__}: {res}") + if print_prompt: + print(res) # Use the parse_output_to_fields method from the PromptStrategy parsed_output = {} try: - parsed_output = self.template.parse_output_to_fields(res) + parsed_output = self.template.parse_output_to_fields(res, llm_type) except Exception as e: import traceback traceback.print_exc() @@ -148,6 +189,7 @@ def _invoke_with_retries(self, chain, input, max_tries=1, config: Optional[Runna def invoke(self, input: Input, config: Optional[RunnableConfig] = {}) -> Output: # logger.debug(f"Template: {self.template}") # logger.debug(f"Config: {config}") + chain = ( self.template | config['llm'] diff --git a/langdspy/prompt_strategies.py b/langdspy/prompt_strategies.py index 7629b93..fd40802 100644 --- a/langdspy/prompt_strategies.py +++ b/langdspy/prompt_strategies.py @@ -61,96 +61,195 @@ def validate_inputs(self, inputs_dict): raise ValueError(f"Input keys do not match expected input keys {inputs_dict.keys()} {self.input_variables.keys()}") def format(self, **kwargs: Any) -> str: + logger.debug(f"PromptStrategy format with kwargs: {kwargs}") return self.format_prompt(**kwargs) + def format_prompt(self, **kwargs: Any) -> str: + llm_type = kwargs.pop('llm_type', None) + + trained_state = kwargs.pop('trained_state', None) + print_prompt = kwargs.pop('print_prompt', False) + use_training = kwargs.pop('use_training', True) + # print(f"Formatting prompt with trained_state {trained_state} and print_prompt {print_prompt} and kwargs {kwargs}") + # print(f"Formatting prompt with use_training {use_training}") + + try: + # logger.debug(f"Formatting prompt with kwargs: {kwargs}") + self.validate_inputs(kwargs) + + logger.debug(f"PromptStrategy format_prompt with kwargs: {kwargs}") + + if llm_type == 'openai': + prompt = self._format_openai_prompt(trained_state, use_training, **kwargs) + elif llm_type == 'anthropic': + prompt = self._format_anthropic_prompt(trained_state, use_training, **kwargs) + + if print_prompt: + print(prompt) + + return prompt + except Exception as e: + logger.error(f"Failed to format prompt with kwargs: {kwargs}") + import traceback + traceback.print_exc() + raise e + + def parse_output_to_fields(self, output: str, llm_type: str) -> dict: + if llm_type == 'openai': + return self._parse_openai_output_to_fields(output) + elif llm_type == 'anthropic': + return self._parse_anthropic_output_to_fields(output) + elif llm_type == 'test': + return self._parse_openai_output_to_fields(output) + else: + raise ValueError(f"Unsupported LLM type: {llm_type}") + + + @abstractmethod + def _format_openai_prompt(self, **kwargs: Any) -> str: + pass + + @abstractmethod + def _format_anthropic_prompt(self, **kwargs: Any) -> str: + pass + def _get_output_field(self, field_name): for output_name, output_field in self.output_variables.items(): if output_field.name == field_name: return output_name + @abstractmethod + def _parse_openai_output_to_fields(self, output: str) -> dict: + pass + + @abstractmethod + def _parse_anthropic_output_to_fields(self, output: str) -> dict: + pass + class DefaultPromptStrategy(PromptStrategy): OUTPUT_TOKEN = "🔑" - def format_prompt(self, **kwargs: Any) -> str: + def _format_openai_prompt(self, trained_state, use_training, **kwargs) -> str: # print(f"Formatting prompt {kwargs}") - trained_state = kwargs.pop('trained_state', None) - print_prompt = kwargs.pop('print_prompt', False) + prompt = "Follow the following format. Attributes that have values should not be changed or repeated. " - try: - # logger.debug(f"Formatting prompt with kwargs: {kwargs}") - self.validate_inputs(kwargs) + if len(self.output_variables) > 1: + #Provide answers for Solution Effectiveness, Rationale and Confidence + # Extract names from output_variables + output_field_names = ', '.join([output_field.name for output_field in self.output_variables.values()]) - prompt = "Follow the following format. Attributes that have values should not be changed or repeated. " + # Format the instruction with the extracted names + prompt += f"Provide answers for {output_field_names}\n" - if len(self.output_variables) > 1: - #Provide answers for Solution Effectiveness, Rationale and Confidence - # Extract names from output_variables - output_field_names = ', '.join([output_field.name for output_field in self.output_variables.values()]) - # Format the instruction with the extracted names - prompt += f"Provide answers for {output_field_names}\n" + if self.hint_variables: + prompt += "\n" + for _, hint_field in self.hint_variables.items(): + prompt += hint_field.format_prompt_description("openai") + "\n" - if self.hint_variables: - prompt += "\n" + prompt += "\n\n" - for _, hint_field in self.hint_variables.items(): - prompt += hint_field.format_prompt_description() + "\n" + for input_name, input_field in self.input_variables.items(): + # prompt += f"⏎{input_field.name}: {input_field.desc}\n" + prompt += input_field.format_prompt_description("openai") + "\n" - prompt += "\n\n" + for output_name, output_field in self.output_variables.items(): + prompt += output_field.format_prompt_description("openai") + "\n" + # prompt += f"{self.OUTPUT_TOKEN}{output_field.name}: {output_field.desc}\n" - for input_name, input_field in self.input_variables.items(): - # prompt += f"⏎{input_field.name}: {input_field.desc}\n" - prompt += input_field.format_prompt_description() + "\n" + """ - for output_name, output_field in self.output_variables.items(): - prompt += output_field.format_prompt_description() + "\n" - # prompt += f"{self.OUTPUT_TOKEN}{output_field.name}: {output_field.desc}\n" + EXAMPLES GO HERE + + """ + if trained_state and trained_state.examples and use_training: + for example_X, example_y in trained_state.examples: + prompt += "\n---\n\n" - """ + for input_name, input_field in self.input_variables.items(): + prompt += input_field.format_prompt_value(example_X.get(input_name), "openai") + "\n" - EXAMPLES GO HERE - - """ - if trained_state and trained_state.examples: - for example_X, example_y in trained_state.examples: - prompt += "\n---\n\n" + for output_name, output_field in self.output_variables.items(): + if isinstance(example_y, dict): + prompt += output_field.format_prompt_value(example_y.get(output_name), "openai") + "\n" + else: + prompt += output_field.format_prompt_value(example_y, "openai") + "\n" - for input_name, input_field in self.input_variables.items(): - prompt += input_field.format_prompt_value(example_X.get(input_name)) + "\n" + prompt += "\n---\n\n" - for output_name, output_field in self.output_variables.items(): - if isinstance(example_y, dict): - prompt += output_field.format_prompt_value(example_y.get(output_name)) + "\n" - else: - prompt += output_field.format_prompt_value(example_y) + "\n" - prompt += "\n---\n\n" + for input_name, input_field in self.input_variables.items(): + prompt += input_field.format_prompt_value(kwargs.get(input_name), "openai") + "\n" + for output_name, output_field in self.output_variables.items(): + prompt += output_field.format_prompt("openai") + "\n" - for input_name, input_field in self.input_variables.items(): - prompt += input_field.format_prompt_value(kwargs.get(input_name)) + "\n" + return prompt - for output_name, output_field in self.output_variables.items(): - prompt += output_field.format_prompt() + "\n" + def _format_anthropic_prompt(self, trained_state, use_training, **kwargs) -> str: + # print(f"Formatting prompt {kwargs}") + prompt = "Follow the following format. Attributes that have values should not be changed or repeated. " + + output_field_names = ', '.join([output_field.name for output_field in self.output_variables.values()]) + # Format the instruction with the extracted names + prompt += f"Provide answers for {output_field_names}. Follow the XML output format.\n" + + if self.hint_variables: + prompt += "\n\n" + for _, hint_field in self.hint_variables.items(): + prompt += hint_field.format_prompt_description("anthropic") + "\n" + prompt += "\n" + + prompt += "\n\n\n" + for input_name, input_field in self.input_variables.items(): + # prompt += f"⏎{input_field.name}: {input_field.desc}\n" + prompt += input_field.format_prompt_description("anthropic") + "\n" + prompt += "\n" + prompt += "\n\n" + for output_name, output_field in self.output_variables.items(): + prompt += output_field.format_prompt_description("anthropic") + "\n" + # prompt += f"{self.OUTPUT_TOKEN}{output_field.name}: {output_field.desc}\n" + prompt += "\n" + """ + EXAMPLES GO HERE + + """ + if trained_state and trained_state.examples and use_training: + prompt += "\n\n" + for example_X, example_y in trained_state.examples: + prompt += "\n\n" + prompt += "\n" + for input_name, input_field in self.input_variables.items(): + prompt += input_field.format_prompt_value(example_X.get(input_name), "anthropic") + "\n" + prompt += "\n" + prompt += "\n" + for output_name, output_field in self.output_variables.items(): + if isinstance(example_y, dict): + prompt += output_field.format_prompt_value(example_y.get(output_name), "anthropic") + "\n" + else: + prompt += output_field.format_prompt_value(example_y, "anthropic") + "\n" + prompt += "\n" + prompt += "\n" + prompt += "\n" - # logger.debug(f"Formatted prompt: {prompt}") + prompt += "\n\n" + for input_name, input_field in self.input_variables.items(): + prompt += input_field.format_prompt_value(kwargs.get(input_name), "anthropic") + "\n" + prompt += "\n" - if print_prompt: - print(prompt) - return prompt - except: - logger.error(f"Failed to format prompt with kwargs: {kwargs}") - import traceback - traceback.print_exc() + prompt += "\n\n" + for output_name, output_field in self.output_variables.items(): + prompt += output_field.format_prompt("anthropic") + "\n" + prompt += "\n" + return prompt - def parse_output_to_fields(self, output: str) -> dict: + def _parse_openai_output_to_fields(self, output: str) -> dict: try: pattern = r'^([^:]+): (.*)' lines = output.split(self.OUTPUT_TOKEN) parsed_fields = {} - # logger.debug(f"Parsing output to fields with pattern {pattern} and lines {lines}") for line in lines: match = re.match(pattern, line, re.MULTILINE) @@ -158,7 +257,6 @@ def parse_output_to_fields(self, output: str) -> dict: field_name, field_content = match.groups() # logger.debug(f"Matched line {line} - field name {field_name} field content {field_content}") output_field = self._get_output_field(field_name) - if output_field: # logger.debug(f"Matched field {field_name} to output field {output_field}") parsed_fields[output_field] = field_content @@ -166,23 +264,35 @@ def parse_output_to_fields(self, output: str) -> dict: logger.error(f"Field {field_name} not found in output variables") # else: # logger.debug(f"NO MATCH line {line}") - if len(self.output_variables) == 1: first_value = next(iter(parsed_fields.values()), None) if not first_value: # logger.debug(f"NO MATCHES - setting last field to output: {lines[-1]}") parsed_fields[list(self.output_variables.keys())[0]] = lines[-1] - else: - logger.error(f"NO MATCHES - setting last field to output: {lines[-1]}") - - # logger.debug(f"Parsed fields: {parsed_fields}") - + # else: + # logger.error(f"NO MATCHES - setting last field to output: {lines[-1]}") + logger.debug(f"Parsed fields: {parsed_fields}") return parsed_fields except Exception as e: import traceback traceback.print_exc() - + raise e + def _parse_anthropic_output_to_fields(self, output: str) -> dict: + try: + parsed_fields = {} + for output_name, output_field in self.output_variables.items(): + pattern = fr"<{output_field.name}>(.*?)" + match = re.search(pattern, output, re.DOTALL) + if match: + parsed_fields[output_name] = match.group(1).strip() + + logger.debug(f"Parsed fields: {parsed_fields}") + return parsed_fields + except Exception as e: + import traceback + traceback.print_exc() + raise e diff --git a/langdspy/validators.py b/langdspy/validators.py index 54e77c7..a2d0453 100644 --- a/langdspy/validators.py +++ b/langdspy/validators.py @@ -38,10 +38,10 @@ def is_one_of(input, output_val, kwargs) -> bool: choices = [c.lower() for c in kwargs['choices']] output_val = output_val.lower() - logger.debug(f"Checking if {output_val} is one of {choices}") + # logger.debug(f"Checking if {output_val} is one of {choices}") for choice in choices: if output_val.startswith(choice): - logger.debug(f"Matched {output_val} to {choice}") + # logger.debug(f"Matched {output_val} to {choice}") return True return False diff --git a/tests/test_field_descriptors.py b/tests/test_field_descriptors.py index dfba9b6..d630791 100644 --- a/tests/test_field_descriptors.py +++ b/tests/test_field_descriptors.py @@ -11,11 +11,11 @@ def test_input_field_initialization(): def test_input_field_format_prompt_description(): field = InputField("name", "description") - assert field.format_prompt_description() == "✅name: description" + assert field.format_prompt_description("openai") == "✅name: description" def test_input_field_format_prompt_value(): field = InputField("name", "description") - assert field.format_prompt_value("value") == "✅name: value" + assert field.format_prompt_value("value", "openai") == "✅name: value" def test_input_field_list_initialization(): field = InputFieldList("name", "description") @@ -27,12 +27,12 @@ def test_input_field_list_initialization(): def test_input_field_list_format_prompt_description(): field = InputFieldList("name", "description") - assert field.format_prompt_description() == "✅name: description" + assert field.format_prompt_description("openai") == "✅name: description" def test_input_field_list_format_prompt_value(): field = InputFieldList("name", "description") - assert field.format_prompt_value(["value1", "value2"]) == "✅ [0]: value1\n✅ [1]: value2" + assert field.format_prompt_value(["value1", "value2"], "openai") == "✅ [0]: value1\n✅ [1]: value2" def test_input_field_list_format_prompt_value_empty(): field = InputFieldList("name", "description") - assert field.format_prompt_value([]) == "✅name: NO VALUES SPECIFIED" \ No newline at end of file + assert field.format_prompt_value([], "openai") == "✅name: NO VALUES SPECIFIED" \ No newline at end of file diff --git a/tests/test_output_parsing.py b/tests/test_output_parsing.py new file mode 100644 index 0000000..4303330 --- /dev/null +++ b/tests/test_output_parsing.py @@ -0,0 +1,72 @@ +import pytest +from langdspy.field_descriptors import InputField, OutputField +from langdspy.prompt_strategies import PromptSignature, DefaultPromptStrategy +from langdspy.prompt_runners import PromptRunner + +class TestOutputParsingPromptSignature(PromptSignature): + ticket_summary = InputField(name="Ticket Summary", desc="Summary of the ticket we're trying to analyze.") + buyer_issues_summary = OutputField(name="Buyer Issues Summary", desc="Summary of the issues this buyer is facing.") + buyer_issue_category = OutputField(name="Buyer Issue Enum", desc="One of: ACCOUNT_DELETION, BOX_CONTENTS_CUSTOMIZATION, BRAZE_UNSUBSCRIBE, CANCEL_SUBSCRIPTION, CHANGE_ADDRESS, CHARGE_DISCREPANCY, CHECKOUT_ERROR, COUPON_QUESTION, CUSTOM_SHIPPING_REQUEST, MISROUTED_TICKET, DONATION_REQUEST, DUPLICATE_ORDER, FAILED_REFUND, GIFT_MESSAGE, GUEST_CHECKOUT_UNCLAIMED, INTERNATIONAL_CHARGES, MANAGE_PAYMENT_INFO, MULTIPLE_ACCOUNTS, NPS_CX_FEEDBACK, PASSWORD_RESET, PRE_PURCHASE_QUESTION, SELLER_FEEDBACK, SELLER_NUDGE_OUTREACH, SYSTEM_ERROR, WHERE_IS_MY_ORDER - The specific issue that the buyer is facing.") + +def test_output_parsing_with_extra_fields(): + prompt_runner = PromptRunner(template_class=TestOutputParsingPromptSignature, prompt_strategy=DefaultPromptStrategy) + + input_data = { + "ticket_summary": """Ticket ID: 338355 +Status: Open +Subject: Bump box personalization +Priority: 2 +Group: MP Buyer +Created At: 2024-03-17 20:42:40 +Last Updated: 2024-03-17 20:42:41 +Minutes spent waiting: 35 minutes +Requester: REQUESTER +Responder ID: None - Responder Name: None +Tags: None +Description: [Chatbot]: Hi there, how can we help you today? [user]: Trying to order boxes and put in personaliZation and when I hit done nothing changes [Chatbot]: (No Intent Predicted) [Chatbot]: I'm sorry for the inconvenience. It seems like you're trying to personalize your order, but it's not updating. Please ensure that you select your desired variants (such as color or size preferences) before adding the product to cart. If this doesn't resolve the issue, there might be a technical glitch we need to look into further. * Ordering Multiple Boxes From The Marketplace [Chatbot]: Was I able to help you resolve your question? +(Yes, thank you!) +(No, I need more help.) [user]: (No, I need more help.) [Chatbot]: I apologize that I'm not able to provide you with a resolution through this chat. In order to further assist you with this question our customer service team is available to help you. Please complete the below form to be and someone from our team will contact you shortly to resolve your concern. [Chatbot]: Thank you for chatting with us. [user]: Name: [Penny keefer] Email: [keefer.penny1960@gmail.com] Subject: [Bump box personalization] Question: [Trying to order boxes and put in personaliZation and when I hit done nothing changes also need to know how long for delivery] [Chatbot]: (Uploading attachments) [Chatbot]: (Uploading attachments) +Messages:""" + } + + output_data = """The buyer is trying to personalize their order by selecting variants like color or size, but after making their selections and hitting "done", the changes are not being reflected. They are also asking how long delivery will take. +BOX_CONTENTS_CUSTOMIZATION +Unfortunately, based on the provided input, I do not have enough context to determine how tickets like this have typically been handled in the past or provide relevant agent responses and resolutions. The input only contains marketing emails from a company called Little Poppy Co. promoting their products. Without any actual support ticket details or previous agent responses, I cannot provide a meaningful output for this particular request. +""" + + config = {"llm_type": "anthropic"} + result = prompt_runner.template.parse_output_to_fields(output_data, config["llm_type"]) + + assert result["buyer_issues_summary"] == "The buyer is trying to personalize their order by selecting variants like color or size, but after making their selections and hitting \"done\", the changes are not being reflected. They are also asking how long delivery will take." + assert result["buyer_issue_category"] == "BOX_CONTENTS_CUSTOMIZATION" + +def test_output_parsing_with_missing_fields(): + prompt_runner = PromptRunner(template_class=TestOutputParsingPromptSignature, prompt_strategy=DefaultPromptStrategy) + + input_data = { + "ticket_summary": """Ticket ID: 338355 +Status: Open +Subject: Bump box personalization +Priority: 2 +Group: MP Buyer +Created At: 2024-03-17 20:42:40 +Last Updated: 2024-03-17 20:42:41 +Minutes spent waiting: 35 minutes +Requester: REQUESTER +Responder ID: None - Responder Name: None +Tags: None +Description: [Chatbot]: Hi there, how can we help you today? [user]: Trying to order boxes and put in personaliZation and when I hit done nothing changes [Chatbot]: (No Intent Predicted) [Chatbot]: I'm sorry for the inconvenience. It seems like you're trying to personalize your order, but it's not updating. Please ensure that you select your desired variants (such as color or size preferences) before adding the product to cart. If this doesn't resolve the issue, there might be a technical glitch we need to look into further. * Ordering Multiple Boxes From The Marketplace [Chatbot]: Was I able to help you resolve your question? +(Yes, thank you!) +(No, I need more help.) [user]: (No, I need more help.) [Chatbot]: I apologize that I'm not able to provide you with a resolution through this chat. In order to further assist you with this question our customer service team is available to help you. Please complete the below form to be and someone from our team will contact you shortly to resolve your concern. [Chatbot]: Thank you for chatting with us. [user]: Name: [Penny keefer] Email: [keefer.penny1960@gmail.com] Subject: [Bump box personalization] Question: [Trying to order boxes and put in personaliZation and when I hit done nothing changes also need to know how long for delivery] [Chatbot]: (Uploading attachments) [Chatbot]: (Uploading attachments) +Messages:""" + } + + output_data = """The buyer is trying to personalize their order by selecting variants like color or size, but after making their selections and hitting "done", the changes are not being reflected. They are also asking how long delivery will take. +Unfortunately, based on the provided input, I do not have enough context to determine how tickets like this have typically been handled in the past or provide relevant agent responses and resolutions. The input only contains marketing emails from a company called Little Poppy Co. promoting their products. Without any actual support ticket details or previous agent responses, I cannot provide a meaningful output for this particular request. +""" + + config = {"llm_type": "anthropic"} + result = prompt_runner.template.parse_output_to_fields(output_data, config["llm_type"]) + + assert result["buyer_issues_summary"] == "The buyer is trying to personalize their order by selecting variants like color or size, but after making their selections and hitting \"done\", the changes are not being reflected. They are also asking how long delivery will take." + assert result.get("buyer_issue_category") is None \ No newline at end of file diff --git a/tests/test_prompt_formatting.py b/tests/test_prompt_formatting.py new file mode 100644 index 0000000..7dfccbe --- /dev/null +++ b/tests/test_prompt_formatting.py @@ -0,0 +1,67 @@ +# tests/test_prompt_formatting.py +import pytest +from langdspy.field_descriptors import InputField, OutputField, HintField +from langdspy.prompt_strategies import PromptSignature, DefaultPromptStrategy +from langdspy.prompt_runners import PromptRunner +from langchain_openai import ChatOpenAI +from langchain_anthropic import ChatAnthropic + +class TestPromptSignature(PromptSignature): + input = InputField(name="input", desc="Input field") + output = OutputField(name="output", desc="Output field") + hint = HintField(desc="Hint field") + +def test_format_prompt_openai(): + prompt_runner = PromptRunner(template_class=TestPromptSignature, prompt_strategy=DefaultPromptStrategy) + + formatted_prompt = prompt_runner.template._format_openai_prompt(trained_state=None, use_training=True, input="test input") + print(formatted_prompt) + + assert "💡 Hint field" in formatted_prompt + assert "✅input: Input field" in formatted_prompt + assert "🔑output: Output field" in formatted_prompt + assert "✅input: test input" in formatted_prompt + assert "🔑output:" in formatted_prompt + +def test_format_prompt_anthropic(): + prompt_runner = PromptRunner(template_class=TestPromptSignature, prompt_strategy=DefaultPromptStrategy) + + formatted_prompt = prompt_runner.template._format_anthropic_prompt(trained_state=None, use_training=True, input="test input") + + assert "Hint field" in formatted_prompt + assert "Input field" in formatted_prompt + assert "Output field" in formatted_prompt + assert "test input" in formatted_prompt + assert "" in formatted_prompt + +def test_parse_output_openai(): + prompt_runner = PromptRunner(template_class=TestPromptSignature, prompt_strategy=DefaultPromptStrategy) + + output = "🔑output: test output" + parsed_output = prompt_runner.template._parse_openai_output_to_fields(output) + + assert parsed_output["output"] == "test output" + +def test_parse_output_anthropic(): + prompt_runner = PromptRunner(template_class=TestPromptSignature, prompt_strategy=DefaultPromptStrategy) + + output = "test output" + parsed_output = prompt_runner.template._parse_anthropic_output_to_fields(output) + + assert parsed_output["output"] == "test output" + +def test_llm_type_detection_openai(): + prompt_runner = PromptRunner(template_class=TestPromptSignature, prompt_strategy=DefaultPromptStrategy) + + llm = ChatOpenAI() + llm_type = prompt_runner._determine_llm_type(llm) + + assert llm_type == "openai" + +def test_llm_type_detection_anthropic(): + prompt_runner = PromptRunner(template_class=TestPromptSignature, prompt_strategy=DefaultPromptStrategy) + + llm = ChatAnthropic(model_name="claude-3-sonnet-20240229") + llm_type = prompt_runner._determine_llm_type(llm) + + assert llm_type == "anthropic" \ No newline at end of file diff --git a/tests/test_prompt_runner.py b/tests/test_prompt_runner.py index 77cf06f..7cd7f7e 100644 --- a/tests/test_prompt_runner.py +++ b/tests/test_prompt_runner.py @@ -43,7 +43,7 @@ def invoke(self, input_dict, config): from langchain.chat_models.base import BaseChatModel -class FakeLLM(BaseChatModel): +class TestLLM(BaseChatModel): def invoke(self, *args, **kwargs): return "INVOKED" @@ -51,7 +51,7 @@ def _generate(self, *args, **kwargs): return None def _llm_type(self) -> str: - return "fake" + return "test" def test_print_prompt_in_inputs(): model = TestModel(n_jobs=1, print_prompt="TEST") @@ -59,12 +59,14 @@ def test_print_prompt_in_inputs(): mock_invoke = MagicMock(return_value="FORMATTED PROMPT") with patch.object(DefaultPromptStrategy, 'format_prompt', new=mock_invoke): - config = {"llm": FakeLLM()} + config = {"llm": TestLLM(), "llm_type": "test"} result = model.invoke(input_dict, config=config) print(result) print(f"Called with {mock_invoke.call_count} {mock_invoke.call_args_list} {mock_invoke.call_args}") - mock_invoke.assert_called_once_with(**{**input_dict, 'print_prompt': "TEST", 'trained_state': model.trained_state}) + call_args = {**input_dict, 'print_prompt': "TEST", 'trained_state': model.trained_state, 'use_training': True, 'llm_type': "test"} + print(f"Expecting call {call_args}") + mock_invoke.assert_called_once_with(**call_args) def test_trained_state_in_inputs(): model = TestModel(n_jobs=1) @@ -72,10 +74,28 @@ def test_trained_state_in_inputs(): mock_invoke = MagicMock(return_value="FORMATTED PROMPT") with patch.object(DefaultPromptStrategy, 'format_prompt', new=mock_invoke): - config = {"llm": FakeLLM()} + config = {"llm": TestLLM(), "llm_type": "test"} model.trained_state.examples = [("EXAMPLE_X", "EXAMPLE_Y")] result = model.invoke(input_dict, config=config) print(result) print(f"Called with {mock_invoke.call_count} {mock_invoke.call_args_list} {mock_invoke.call_args}") - mock_invoke.assert_called_once_with(**{**input_dict, 'print_prompt': "TEST", 'trained_state': model.trained_state}) \ No newline at end of file + call_args = {**input_dict, 'print_prompt': "TEST", 'trained_state': model.trained_state, 'use_training': True, 'llm_type': "test"} + print(f"Expecting call {call_args}") + mock_invoke.assert_called_once_with(**call_args) + +def test_use_training(): + model = TestModel(n_jobs=1) + input_dict = {"input": "Test input"} + mock_invoke = MagicMock(return_value="FORMATTED PROMPT") + + with patch.object(DefaultPromptStrategy, 'format_prompt', new=mock_invoke): + config = {"llm": TestLLM(), "use_training": False, "llm_type": "test"} + model.trained_state.examples = [("EXAMPLE_X", "EXAMPLE_Y")] + result = model.invoke(input_dict, config=config) + + print(result) + print(f"Called with {mock_invoke.call_count} {mock_invoke.call_args_list} {mock_invoke.call_args}") + call_args = {**input_dict, 'print_prompt': "TEST", 'trained_state': model.trained_state, 'use_training': False, 'llm_type': "test"} + print(f"Expecting call {call_args}") + mock_invoke.assert_called_once_with(**call_args) \ No newline at end of file