From f4ba32c5676089b5030d6a98d193d05be4cf544a Mon Sep 17 00:00:00 2001 From: Amir Elaguizy Date: Wed, 20 Mar 2024 15:50:16 -0500 Subject: [PATCH 1/7] Fixed --- langdspy/transformers.py | 26 ++++++++++++++++++-------- langdspy/validators.py | 8 ++++---- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/langdspy/transformers.py b/langdspy/transformers.py index 7eef24c..a4f1774 100644 --- a/langdspy/transformers.py +++ b/langdspy/transformers.py @@ -16,17 +16,27 @@ def as_json_list(val: str, kwargs: Dict[str, Any]) -> List[Dict[str, Any]]: def as_json(val: str, kwargs: Dict[str, Any]) -> Any: return json.loads(val) +def normalize_enum_value(val: str) -> str: + return val.replace(" ", "_").replace("-", "_").upper() + def as_enum(val: str, kwargs: Dict[str, Any]) -> Enum: enum_class = kwargs['enum'] - try: - return enum_class[val.upper()] - except KeyError: - raise ValueError(f"{val} is not a valid member of the {enum_class.__name__} enumeration") + normalized_val = normalize_enum_value(val) + for member in enum_class: + if normalize_enum_value(member.name) == normalized_val: + return member + raise ValueError(f"{val} is not a valid member of the {enum_class.__name__} enumeration") def as_enum_list(val: str, kwargs: Dict[str, Any]) -> List[Enum]: enum_class = kwargs['enum'] values = [v.strip() for v in val.split(",")] - try: - return [enum_class[v.upper()] for v in values] - except KeyError as e: - raise ValueError(f"{e.args[0]} is not a valid member of the {enum_class.__name__} enumeration") \ No newline at end of file + result = [] + for v in values: + normalized_val = normalize_enum_value(v) + for member in enum_class: + if normalize_enum_value(member.name) == normalized_val: + result.append(member) + break + else: + raise ValueError(f"{v} is not a valid member of the {enum_class.__name__} enumeration") + return result diff --git a/langdspy/validators.py b/langdspy/validators.py index a13cb6b..9c325ce 100644 --- a/langdspy/validators.py +++ b/langdspy/validators.py @@ -34,8 +34,8 @@ def is_one_of(input, output_val, kwargs) -> bool: try: if not kwargs.get('case_sensitive', False): - choices = [c.lower() for c in kwargs['choices']] - output_val = output_val.lower() + choices = [c.lower().replace("_", " ") for c in kwargs['choices']] + output_val = output_val.lower().replace("_", " ") # logger.debug(f"Checking if {output_val} is one of {choices}") for choice in choices: @@ -61,8 +61,8 @@ def is_subset_of(input, output_val, kwargs) -> bool: try: values = [v.strip() for v in output_val.split(",")] if not kwargs.get('case_sensitive', False): - choices = [c.lower() for c in kwargs['choices']] - values = [v.lower() for v in values] + choices = [c.lower().replace("_", " ") for c in kwargs['choices']] + values = [v.lower().replace("_", " ") for v in values] for value in values: if value not in choices: return False From fb82b1a7cfee3d1462eacebc2926654fbfc21b4a Mon Sep 17 00:00:00 2001 From: Amir Elaguizy Date: Wed, 20 Mar 2024 16:21:23 -0500 Subject: [PATCH 2/7] Tests in --- langdspy/data_helper.py | 2 ++ langdspy/transformers.py | 4 +--- langdspy/validators.py | 10 +++++----- tests/test_transformers.py | 19 ++++++++++++++----- tests/test_validators.py | 13 ++++++++----- 5 files changed, 30 insertions(+), 18 deletions(-) create mode 100644 langdspy/data_helper.py diff --git a/langdspy/data_helper.py b/langdspy/data_helper.py new file mode 100644 index 0000000..4a5fb31 --- /dev/null +++ b/langdspy/data_helper.py @@ -0,0 +1,2 @@ +def normalize_enum_value(val: str) -> str: + return val.replace(" ", "_").replace("-", "_").upper() diff --git a/langdspy/transformers.py b/langdspy/transformers.py index a4f1774..ff78739 100644 --- a/langdspy/transformers.py +++ b/langdspy/transformers.py @@ -3,6 +3,7 @@ from enum import Enum from langchain_core.documents import Document import re +from .data_helper import normalize_enum_value def as_bool(value: str, kwargs: Dict[str, Any]) -> bool: value = re.sub(r'[^\w\s]', '', value) @@ -16,9 +17,6 @@ def as_json_list(val: str, kwargs: Dict[str, Any]) -> List[Dict[str, Any]]: def as_json(val: str, kwargs: Dict[str, Any]) -> Any: return json.loads(val) -def normalize_enum_value(val: str) -> str: - return val.replace(" ", "_").replace("-", "_").upper() - def as_enum(val: str, kwargs: Dict[str, Any]) -> Enum: enum_class = kwargs['enum'] normalized_val = normalize_enum_value(val) diff --git a/langdspy/validators.py b/langdspy/validators.py index 9c325ce..53006fb 100644 --- a/langdspy/validators.py +++ b/langdspy/validators.py @@ -1,6 +1,6 @@ import json import logging - +from .data_helper import normalize_enum_value logger = logging.getLogger("langdspy") @@ -34,8 +34,8 @@ def is_one_of(input, output_val, kwargs) -> bool: try: if not kwargs.get('case_sensitive', False): - choices = [c.lower().replace("_", " ") for c in kwargs['choices']] - output_val = output_val.lower().replace("_", " ") + choices = [normalize_enum_value(c) for c in kwargs['choices']] + output_val = normalize_enum_value (output_val) # logger.debug(f"Checking if {output_val} is one of {choices}") for choice in choices: @@ -61,8 +61,8 @@ def is_subset_of(input, output_val, kwargs) -> bool: try: values = [v.strip() for v in output_val.split(",")] if not kwargs.get('case_sensitive', False): - choices = [c.lower().replace("_", " ") for c in kwargs['choices']] - values = [v.lower().replace("_", " ") for v in values] + choices = [normalize_enum_value(c) for c in kwargs['choices']] + values = [normalize_enum_value(v) for v in values] for value in values: if value not in choices: return False diff --git a/tests/test_transformers.py b/tests/test_transformers.py index a46cb01..44a97dd 100644 --- a/tests/test_transformers.py +++ b/tests/test_transformers.py @@ -22,22 +22,31 @@ def test_as_enum(): class Fruit(Enum): APPLE = 1 BANANA = 2 + CHERRY_PIE = 3 + DURIAN_FRUIT = 4 assert transformers.as_enum("APPLE", {"enum": Fruit}) == Fruit.APPLE assert transformers.as_enum("BANANA", {"enum": Fruit}) == Fruit.BANANA + assert transformers.as_enum("cherry pie", {"enum": Fruit}) == Fruit.CHERRY_PIE + assert transformers.as_enum("Durian-Fruit", {"enum": Fruit}) == Fruit.DURIAN_FRUIT + assert transformers.as_enum("Durian_Fruit", {"enum": Fruit}) == Fruit.DURIAN_FRUIT + assert transformers.as_enum("Durian Fruit", {"enum": Fruit}) == Fruit.DURIAN_FRUIT with pytest.raises(ValueError): - transformers.as_enum("CHERRY", {"enum": Fruit}) + transformers.as_enum("MANGO", {"enum": Fruit}) def test_as_enum_list(): class Fruit(Enum): APPLE = 1 BANANA = 2 - CHERRY = 3 + CHERRY_PIE = 3 + DURIAN_FRUIT = 4 assert transformers.as_enum_list("APPLE", {"enum": Fruit}) == [Fruit.APPLE] - assert transformers.as_enum_list("BANANA, CHERRY", {"enum": Fruit}) == [Fruit.BANANA, Fruit.CHERRY] - assert transformers.as_enum_list("APPLE,BANANA,CHERRY", {"enum": Fruit}) == [Fruit.APPLE, Fruit.BANANA, Fruit.CHERRY] + assert transformers.as_enum_list("BANANA, CHERRY PIE", {"enum": Fruit}) == [Fruit.BANANA, Fruit.CHERRY_PIE] + assert transformers.as_enum_list("APPLE,BANANA,CHERRY PIE", {"enum": Fruit}) == [Fruit.APPLE, Fruit.BANANA, Fruit.CHERRY_PIE] + assert transformers.as_enum_list("Durian-Fruit, cherry pie", {"enum": Fruit}) == [Fruit.DURIAN_FRUIT, Fruit.CHERRY_PIE] + assert transformers.as_enum_list("Durian Fruit, cherry_pie", {"enum": Fruit}) == [Fruit.DURIAN_FRUIT, Fruit.CHERRY_PIE] with pytest.raises(ValueError): - transformers.as_enum_list("DURIAN", {"enum": Fruit}) \ No newline at end of file + transformers.as_enum_list("MANGO", {"enum": Fruit}) \ No newline at end of file diff --git a/tests/test_validators.py b/tests/test_validators.py index 1ae4030..debf44b 100644 --- a/tests/test_validators.py +++ b/tests/test_validators.py @@ -12,24 +12,27 @@ def test_is_one_of(): assert validators.is_one_of({}, 'cherry', {'choices': ['apple', 'banana']}) == False assert validators.is_one_of({}, 'APPLE', {'choices': ['apple', 'banana'], 'case_sensitive': False}) == True assert validators.is_one_of({}, 'none', {'choices': ['apple', 'banana'], 'none_ok': True}) == True + assert validators.is_one_of({}, 'apple pie', {'choices': ['apple_pie', 'banana split'], 'case_sensitive': False}) == True + assert validators.is_one_of({}, 'Apple-Pie', {'choices': ['apple_pie', 'banana-split'], 'case_sensitive': False}) == True with pytest.raises(ValueError): validators.is_one_of({}, 'apple', {}) def test_is_subset_of(): - choices = ["apple", "banana", "cherry"] + choices = ["apple", "banana", "cherry_pie", "durian-fruit"] assert validators.is_subset_of({}, "apple", {"choices": choices}) == True assert validators.is_subset_of({}, "apple,banana", {"choices": choices}) == True - assert validators.is_subset_of({}, "apple, banana, cherry", {"choices": choices}) == True + assert validators.is_subset_of({}, "apple, banana, cherry_pie", {"choices": choices}) == True assert validators.is_subset_of({}, "APPLE", {"choices": choices, "case_sensitive": False}) == True assert validators.is_subset_of({}, "APPLE,BANANA", {"choices": choices, "case_sensitive": False}) == True + assert validators.is_subset_of({}, "Durian-Fruit, Cherry Pie", {"choices": choices, "case_sensitive": False}) == True - assert validators.is_subset_of({}, "durian", {"choices": choices}) == False - assert validators.is_subset_of({}, "apple,durian", {"choices": choices}) == False + assert validators.is_subset_of({}, "mango", {"choices": choices}) == False + assert validators.is_subset_of({}, "apple,mango", {"choices": choices}) == False assert validators.is_subset_of({}, "none", {"choices": choices, "none_ok": True}) == True assert validators.is_subset_of({}, "apple,none", {"choices": choices, "none_ok": True}) == False with pytest.raises(ValueError): - validators.is_subset_of({}, "apple", {}) + validators.is_subset_of({}, "apple", {}) \ No newline at end of file From ef5f3c87b7caddc3c10060abf2f95897d3a2fa8c Mon Sep 17 00:00:00 2001 From: Amir Elaguizy Date: Wed, 20 Mar 2024 22:01:25 -0500 Subject: [PATCH 3/7] workflow updates --- .github/workflows/test.yml | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index dbb3c12..c9211dd 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,48 +1,42 @@ name: Run Tests - on: pull_request: branches: [main] - jobs: test: runs-on: ubuntu-latest - steps: - uses: actions/checkout@v2 - - name: Set up Python uses: actions/setup-python@v2 with: python-version: 3.9 - - name: Install Poetry run: | curl -sSL https://install.python-poetry.org | python3 - - - name: Configure Poetry run: | echo "$HOME/.local/bin" >> $GITHUB_PATH poetry config virtualenvs.in-project true - - name: Set up cache uses: actions/cache@v2 with: path: .venv key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }} - - name: Install dependencies run: | poetry install - - name: Run tests with coverage run: | - poetry run test - + poetry run pytest --cov=langdspy --cov-report=html tests/ + - name: Check test results + if: failure() + run: | + echo "Tests failed. Please fix the failing tests." + exit 1 - name: Generate coverage report run: | - poetry run coverage - + poetry run coverage html - name: Upload coverage report uses: actions/upload-artifact@v2 with: From da24857db1fbd23f9cc191eda348b55cf985579b Mon Sep 17 00:00:00 2001 From: Amir Elaguizy Date: Wed, 20 Mar 2024 22:09:40 -0500 Subject: [PATCH 4/7] Diagnostics --- langdspy/prompt_strategies.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/langdspy/prompt_strategies.py b/langdspy/prompt_strategies.py index a388440..a41810b 100644 --- a/langdspy/prompt_strategies.py +++ b/langdspy/prompt_strategies.py @@ -42,6 +42,8 @@ def __init__(self, **kwargs): hints = {} # New dictionary to hold hint fields for name, attribute in self.__class__.__fields__.items(): + print(f"Type of attribute.type_: {type(attribute.type_)}") + print(f"Class of attribute.type_: {attribute.type_.__class__}") if issubclass(attribute.type_, InputField): inputs[name] = attribute.default elif issubclass(attribute.type_, OutputField): From 5053fa34c233dc7c1c843689e1951dbefe9993ea Mon Sep 17 00:00:00 2001 From: Amir Elaguizy Date: Wed, 20 Mar 2024 22:13:03 -0500 Subject: [PATCH 5/7] Speculative workflow fix --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c9211dd..fd00434 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,7 +10,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: 3.9 + python-version: '3.11' - name: Install Poetry run: | curl -sSL https://install.python-poetry.org | python3 - From c23472fe3e6eab60c56ab30549aa914c9a6acc6a Mon Sep 17 00:00:00 2001 From: Amir Elaguizy Date: Wed, 20 Mar 2024 22:16:12 -0500 Subject: [PATCH 6/7] Test fixes --- langdspy/prompt_strategies.py | 2 -- tests/test_model_train.py | 46 ++++++++++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/langdspy/prompt_strategies.py b/langdspy/prompt_strategies.py index a41810b..a388440 100644 --- a/langdspy/prompt_strategies.py +++ b/langdspy/prompt_strategies.py @@ -42,8 +42,6 @@ def __init__(self, **kwargs): hints = {} # New dictionary to hold hint fields for name, attribute in self.__class__.__fields__.items(): - print(f"Type of attribute.type_: {type(attribute.type_)}") - print(f"Class of attribute.type_: {attribute.type_.__class__}") if issubclass(attribute.type_, InputField): inputs[name] = attribute.default elif issubclass(attribute.type_, OutputField): diff --git a/tests/test_model_train.py b/tests/test_model_train.py index 1a3fae3..94cbe02 100644 --- a/tests/test_model_train.py +++ b/tests/test_model_train.py @@ -7,7 +7,51 @@ dotenv.load_dotenv() import pytest from unittest.mock import MagicMock -from examples.amazon.generate_slugs import ProductSlugGenerator, slug_similarity, get_llm +import langdspy +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.metrics.pairwise import cosine_similarity + + +class GenerateSlug(langdspy.PromptSignature): + hint_slug = langdspy.HintField(desc="Generate a URL-friendly slug based on the provided H1, title, and product copy. The slug should be lowercase, use hyphens to separate words, and not exceed 50 characters.") + + h1 = langdspy.InputField(name="H1", desc="The H1 heading of the product page") + title = langdspy.InputField(name="Title", desc="The title of the product page") + product_copy = langdspy.InputField(name="Product Copy", desc="The product description or copy") + + slug = langdspy.OutputField(name="Slug", desc="The generated URL-friendly slug") + +class ProductSlugGenerator(langdspy.Model): + generate_slug = langdspy.PromptRunner(template_class=GenerateSlug, prompt_strategy=langdspy.DefaultPromptStrategy) + + def invoke(self, input_dict, config): + h1 = input_dict['h1'] + title = input_dict['title'] + product_copy = input_dict['product_copy'] + + slug_res = self.generate_slug.invoke({'h1': h1, 'title': title, 'product_copy': product_copy}, config=config) + + return slug_res.slug + + +def cosine_similarity_tfidf(true_slugs, predicted_slugs): + # Convert slugs to lowercase + true_slugs = [slug.lower() for slug in true_slugs] + predicted_slugs = [slug.lower() for slug in predicted_slugs] + + # for i in range(len(true_slugs)): + # print(f"Actual Slug: {true_slugs[i]} Predicted: {predicted_slugs[i]}") + + vectorizer = TfidfVectorizer() + true_vectors = vectorizer.fit_transform(true_slugs) + predicted_vectors = vectorizer.transform(predicted_slugs) + similarity_scores = cosine_similarity(true_vectors, predicted_vectors) + return similarity_scores.diagonal() + +def slug_similarity(X, true_slugs, predicted_slugs): + similarity_scores = cosine_similarity_tfidf(true_slugs, predicted_slugs) + average_similarity = sum(similarity_scores) / len(similarity_scores) + return average_similarity @pytest.fixture def model(): From 710bbe6f93a3612864e7e6355c5374726e0112b2 Mon Sep 17 00:00:00 2001 From: Amir Elaguizy Date: Wed, 20 Mar 2024 22:18:51 -0500 Subject: [PATCH 7/7] Test fixes --- .github/workflows/test.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index fd00434..4d51fc5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -5,6 +5,9 @@ on: jobs: test: runs-on: ubuntu-latest + env: + OPENAI_API_KEY: "FAKE" + steps: - uses: actions/checkout@v2 - name: Set up Python