From f4ba32c5676089b5030d6a98d193d05be4cf544a Mon Sep 17 00:00:00 2001
From: Amir Elaguizy <aelaguiz@gmail.com>
Date: Wed, 20 Mar 2024 15:50:16 -0500
Subject: [PATCH 1/7] Fixed

---
 langdspy/transformers.py | 26 ++++++++++++++++++--------
 langdspy/validators.py   |  8 ++++----
 2 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/langdspy/transformers.py b/langdspy/transformers.py
index 7eef24c..a4f1774 100644
--- a/langdspy/transformers.py
+++ b/langdspy/transformers.py
@@ -16,17 +16,27 @@ def as_json_list(val: str, kwargs: Dict[str, Any]) -> List[Dict[str, Any]]:
 def as_json(val: str, kwargs: Dict[str, Any]) -> Any:
     return json.loads(val)
 
+def normalize_enum_value(val: str) -> str:
+    return val.replace(" ", "_").replace("-", "_").upper()
+
 def as_enum(val: str, kwargs: Dict[str, Any]) -> Enum:
     enum_class = kwargs['enum']
-    try:
-        return enum_class[val.upper()]
-    except KeyError:
-        raise ValueError(f"{val} is not a valid member of the {enum_class.__name__} enumeration")
+    normalized_val = normalize_enum_value(val)
+    for member in enum_class:
+        if normalize_enum_value(member.name) == normalized_val:
+            return member
+    raise ValueError(f"{val} is not a valid member of the {enum_class.__name__} enumeration")
 
 def as_enum_list(val: str, kwargs: Dict[str, Any]) -> List[Enum]:
     enum_class = kwargs['enum']
     values = [v.strip() for v in val.split(",")]
-    try:
-        return [enum_class[v.upper()] for v in values]
-    except KeyError as e:
-        raise ValueError(f"{e.args[0]} is not a valid member of the {enum_class.__name__} enumeration")
\ No newline at end of file
+    result = []
+    for v in values:
+        normalized_val = normalize_enum_value(v)
+        for member in enum_class:
+            if normalize_enum_value(member.name) == normalized_val:
+                result.append(member)
+                break
+        else:
+            raise ValueError(f"{v} is not a valid member of the {enum_class.__name__} enumeration")
+    return result
diff --git a/langdspy/validators.py b/langdspy/validators.py
index a13cb6b..9c325ce 100644
--- a/langdspy/validators.py
+++ b/langdspy/validators.py
@@ -34,8 +34,8 @@ def is_one_of(input, output_val, kwargs) -> bool:
 
     try:
         if not kwargs.get('case_sensitive', False):
-            choices = [c.lower() for c in kwargs['choices']]
-            output_val = output_val.lower()
+            choices = [c.lower().replace("_", " ") for c in kwargs['choices']]
+            output_val = output_val.lower().replace("_", " ")
 
         # logger.debug(f"Checking if {output_val} is one of {choices}")
         for choice in choices:
@@ -61,8 +61,8 @@ def is_subset_of(input, output_val, kwargs) -> bool:
     try:
         values = [v.strip() for v in output_val.split(",")]
         if not kwargs.get('case_sensitive', False):
-            choices = [c.lower() for c in kwargs['choices']]
-            values = [v.lower() for v in values]
+            choices = [c.lower().replace("_", " ") for c in kwargs['choices']]
+            values = [v.lower().replace("_", " ") for v in values]
         for value in values:
             if value not in choices:
                 return False

From fb82b1a7cfee3d1462eacebc2926654fbfc21b4a Mon Sep 17 00:00:00 2001
From: Amir Elaguizy <aelaguiz@gmail.com>
Date: Wed, 20 Mar 2024 16:21:23 -0500
Subject: [PATCH 2/7] Tests in

---
 langdspy/data_helper.py    |  2 ++
 langdspy/transformers.py   |  4 +---
 langdspy/validators.py     | 10 +++++-----
 tests/test_transformers.py | 19 ++++++++++++++-----
 tests/test_validators.py   | 13 ++++++++-----
 5 files changed, 30 insertions(+), 18 deletions(-)
 create mode 100644 langdspy/data_helper.py

diff --git a/langdspy/data_helper.py b/langdspy/data_helper.py
new file mode 100644
index 0000000..4a5fb31
--- /dev/null
+++ b/langdspy/data_helper.py
@@ -0,0 +1,2 @@
+def normalize_enum_value(val: str) -> str:
+    return val.replace(" ", "_").replace("-", "_").upper()
diff --git a/langdspy/transformers.py b/langdspy/transformers.py
index a4f1774..ff78739 100644
--- a/langdspy/transformers.py
+++ b/langdspy/transformers.py
@@ -3,6 +3,7 @@
 from enum import Enum
 from langchain_core.documents import Document
 import re
+from .data_helper import normalize_enum_value
 
 def as_bool(value: str, kwargs: Dict[str, Any]) -> bool:
     value = re.sub(r'[^\w\s]', '', value)
@@ -16,9 +17,6 @@ def as_json_list(val: str, kwargs: Dict[str, Any]) -> List[Dict[str, Any]]:
 def as_json(val: str, kwargs: Dict[str, Any]) -> Any:
     return json.loads(val)
 
-def normalize_enum_value(val: str) -> str:
-    return val.replace(" ", "_").replace("-", "_").upper()
-
 def as_enum(val: str, kwargs: Dict[str, Any]) -> Enum:
     enum_class = kwargs['enum']
     normalized_val = normalize_enum_value(val)
diff --git a/langdspy/validators.py b/langdspy/validators.py
index 9c325ce..53006fb 100644
--- a/langdspy/validators.py
+++ b/langdspy/validators.py
@@ -1,6 +1,6 @@
 import json
 import logging
-
+from .data_helper import normalize_enum_value
 
 logger = logging.getLogger("langdspy")
 
@@ -34,8 +34,8 @@ def is_one_of(input, output_val, kwargs) -> bool:
 
     try:
         if not kwargs.get('case_sensitive', False):
-            choices = [c.lower().replace("_", " ") for c in kwargs['choices']]
-            output_val = output_val.lower().replace("_", " ")
+            choices = [normalize_enum_value(c) for c in kwargs['choices']]
+            output_val = normalize_enum_value (output_val)
 
         # logger.debug(f"Checking if {output_val} is one of {choices}")
         for choice in choices:
@@ -61,8 +61,8 @@ def is_subset_of(input, output_val, kwargs) -> bool:
     try:
         values = [v.strip() for v in output_val.split(",")]
         if not kwargs.get('case_sensitive', False):
-            choices = [c.lower().replace("_", " ") for c in kwargs['choices']]
-            values = [v.lower().replace("_", " ") for v in values]
+            choices = [normalize_enum_value(c) for c in kwargs['choices']]
+            values = [normalize_enum_value(v) for v in values]
         for value in values:
             if value not in choices:
                 return False
diff --git a/tests/test_transformers.py b/tests/test_transformers.py
index a46cb01..44a97dd 100644
--- a/tests/test_transformers.py
+++ b/tests/test_transformers.py
@@ -22,22 +22,31 @@ def test_as_enum():
     class Fruit(Enum):
         APPLE = 1
         BANANA = 2
+        CHERRY_PIE = 3
+        DURIAN_FRUIT = 4
     
     assert transformers.as_enum("APPLE", {"enum": Fruit}) == Fruit.APPLE
     assert transformers.as_enum("BANANA", {"enum": Fruit}) == Fruit.BANANA
+    assert transformers.as_enum("cherry pie", {"enum": Fruit}) == Fruit.CHERRY_PIE
+    assert transformers.as_enum("Durian-Fruit", {"enum": Fruit}) == Fruit.DURIAN_FRUIT
+    assert transformers.as_enum("Durian_Fruit", {"enum": Fruit}) == Fruit.DURIAN_FRUIT
+    assert transformers.as_enum("Durian Fruit", {"enum": Fruit}) == Fruit.DURIAN_FRUIT
     
     with pytest.raises(ValueError):
-        transformers.as_enum("CHERRY", {"enum": Fruit})
+        transformers.as_enum("MANGO", {"enum": Fruit})
 
 def test_as_enum_list():
     class Fruit(Enum):
         APPLE = 1
         BANANA = 2
-        CHERRY = 3
+        CHERRY_PIE = 3
+        DURIAN_FRUIT = 4
     
     assert transformers.as_enum_list("APPLE", {"enum": Fruit}) == [Fruit.APPLE]
-    assert transformers.as_enum_list("BANANA, CHERRY", {"enum": Fruit}) == [Fruit.BANANA, Fruit.CHERRY]
-    assert transformers.as_enum_list("APPLE,BANANA,CHERRY", {"enum": Fruit}) == [Fruit.APPLE, Fruit.BANANA, Fruit.CHERRY]
+    assert transformers.as_enum_list("BANANA, CHERRY PIE", {"enum": Fruit}) == [Fruit.BANANA, Fruit.CHERRY_PIE]
+    assert transformers.as_enum_list("APPLE,BANANA,CHERRY PIE", {"enum": Fruit}) == [Fruit.APPLE, Fruit.BANANA, Fruit.CHERRY_PIE]
+    assert transformers.as_enum_list("Durian-Fruit, cherry pie", {"enum": Fruit}) == [Fruit.DURIAN_FRUIT, Fruit.CHERRY_PIE]
+    assert transformers.as_enum_list("Durian Fruit, cherry_pie", {"enum": Fruit}) == [Fruit.DURIAN_FRUIT, Fruit.CHERRY_PIE]
     
     with pytest.raises(ValueError):
-        transformers.as_enum_list("DURIAN", {"enum": Fruit})
\ No newline at end of file
+        transformers.as_enum_list("MANGO", {"enum": Fruit})
\ No newline at end of file
diff --git a/tests/test_validators.py b/tests/test_validators.py
index 1ae4030..debf44b 100644
--- a/tests/test_validators.py
+++ b/tests/test_validators.py
@@ -12,24 +12,27 @@ def test_is_one_of():
     assert validators.is_one_of({}, 'cherry', {'choices': ['apple', 'banana']}) == False
     assert validators.is_one_of({}, 'APPLE', {'choices': ['apple', 'banana'], 'case_sensitive': False}) == True
     assert validators.is_one_of({}, 'none', {'choices': ['apple', 'banana'], 'none_ok': True}) == True
+    assert validators.is_one_of({}, 'apple pie', {'choices': ['apple_pie', 'banana split'], 'case_sensitive': False}) == True
+    assert validators.is_one_of({}, 'Apple-Pie', {'choices': ['apple_pie', 'banana-split'], 'case_sensitive': False}) == True
     
     with pytest.raises(ValueError):
         validators.is_one_of({}, 'apple', {})
 
 def test_is_subset_of():
-    choices = ["apple", "banana", "cherry"]
+    choices = ["apple", "banana", "cherry_pie", "durian-fruit"]
     
     assert validators.is_subset_of({}, "apple", {"choices": choices}) == True
     assert validators.is_subset_of({}, "apple,banana", {"choices": choices}) == True
-    assert validators.is_subset_of({}, "apple, banana, cherry", {"choices": choices}) == True
+    assert validators.is_subset_of({}, "apple, banana, cherry_pie", {"choices": choices}) == True
     assert validators.is_subset_of({}, "APPLE", {"choices": choices, "case_sensitive": False}) == True
     assert validators.is_subset_of({}, "APPLE,BANANA", {"choices": choices, "case_sensitive": False}) == True
+    assert validators.is_subset_of({}, "Durian-Fruit, Cherry Pie", {"choices": choices, "case_sensitive": False}) == True
     
-    assert validators.is_subset_of({}, "durian", {"choices": choices}) == False
-    assert validators.is_subset_of({}, "apple,durian", {"choices": choices}) == False
+    assert validators.is_subset_of({}, "mango", {"choices": choices}) == False
+    assert validators.is_subset_of({}, "apple,mango", {"choices": choices}) == False
     
     assert validators.is_subset_of({}, "none", {"choices": choices, "none_ok": True}) == True
     assert validators.is_subset_of({}, "apple,none", {"choices": choices, "none_ok": True}) == False
     
     with pytest.raises(ValueError):
-        validators.is_subset_of({}, "apple", {})
+        validators.is_subset_of({}, "apple", {})
\ No newline at end of file

From ef5f3c87b7caddc3c10060abf2f95897d3a2fa8c Mon Sep 17 00:00:00 2001
From: Amir Elaguizy <aelaguiz@gmail.com>
Date: Wed, 20 Mar 2024 22:01:25 -0500
Subject: [PATCH 3/7] workflow updates

---
 .github/workflows/test.yml | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index dbb3c12..c9211dd 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,48 +1,42 @@
 name: Run Tests
-
 on:
   pull_request:
     branches: [main]
-
 jobs:
   test:
     runs-on: ubuntu-latest
-
     steps:
     - uses: actions/checkout@v2
-
     - name: Set up Python
       uses: actions/setup-python@v2
       with:
         python-version: 3.9
-
     - name: Install Poetry
       run: |
         curl -sSL https://install.python-poetry.org | python3 -
-
     - name: Configure Poetry
       run: |
         echo "$HOME/.local/bin" >> $GITHUB_PATH
         poetry config virtualenvs.in-project true
-
     - name: Set up cache
       uses: actions/cache@v2
       with:
         path: .venv
         key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}
-
     - name: Install dependencies
       run: |
         poetry install
-
     - name: Run tests with coverage
       run: |
-        poetry run test
-
+        poetry run pytest --cov=langdspy --cov-report=html tests/
+    - name: Check test results
+      if: failure()
+      run: |
+        echo "Tests failed. Please fix the failing tests."
+        exit 1
     - name: Generate coverage report
       run: |
-        poetry run coverage
-
+        poetry run coverage html
     - name: Upload coverage report
       uses: actions/upload-artifact@v2
       with:

From da24857db1fbd23f9cc191eda348b55cf985579b Mon Sep 17 00:00:00 2001
From: Amir Elaguizy <aelaguiz@gmail.com>
Date: Wed, 20 Mar 2024 22:09:40 -0500
Subject: [PATCH 4/7] Diagnostics

---
 langdspy/prompt_strategies.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/langdspy/prompt_strategies.py b/langdspy/prompt_strategies.py
index a388440..a41810b 100644
--- a/langdspy/prompt_strategies.py
+++ b/langdspy/prompt_strategies.py
@@ -42,6 +42,8 @@ def __init__(self, **kwargs):
         hints = {}  # New dictionary to hold hint fields
 
         for name, attribute in self.__class__.__fields__.items():
+            print(f"Type of attribute.type_: {type(attribute.type_)}")
+            print(f"Class of attribute.type_: {attribute.type_.__class__}")
             if issubclass(attribute.type_, InputField):
                 inputs[name] = attribute.default
             elif issubclass(attribute.type_, OutputField):

From 5053fa34c233dc7c1c843689e1951dbefe9993ea Mon Sep 17 00:00:00 2001
From: Amir Elaguizy <aelaguiz@gmail.com>
Date: Wed, 20 Mar 2024 22:13:03 -0500
Subject: [PATCH 5/7] Speculative workflow fix

---
 .github/workflows/test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index c9211dd..fd00434 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -10,7 +10,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v2
       with:
-        python-version: 3.9
+        python-version: '3.11'
     - name: Install Poetry
       run: |
         curl -sSL https://install.python-poetry.org | python3 -

From c23472fe3e6eab60c56ab30549aa914c9a6acc6a Mon Sep 17 00:00:00 2001
From: Amir Elaguizy <aelaguiz@gmail.com>
Date: Wed, 20 Mar 2024 22:16:12 -0500
Subject: [PATCH 6/7] Test fixes

---
 langdspy/prompt_strategies.py |  2 --
 tests/test_model_train.py     | 46 ++++++++++++++++++++++++++++++++++-
 2 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/langdspy/prompt_strategies.py b/langdspy/prompt_strategies.py
index a41810b..a388440 100644
--- a/langdspy/prompt_strategies.py
+++ b/langdspy/prompt_strategies.py
@@ -42,8 +42,6 @@ def __init__(self, **kwargs):
         hints = {}  # New dictionary to hold hint fields
 
         for name, attribute in self.__class__.__fields__.items():
-            print(f"Type of attribute.type_: {type(attribute.type_)}")
-            print(f"Class of attribute.type_: {attribute.type_.__class__}")
             if issubclass(attribute.type_, InputField):
                 inputs[name] = attribute.default
             elif issubclass(attribute.type_, OutputField):
diff --git a/tests/test_model_train.py b/tests/test_model_train.py
index 1a3fae3..94cbe02 100644
--- a/tests/test_model_train.py
+++ b/tests/test_model_train.py
@@ -7,7 +7,51 @@
 dotenv.load_dotenv()
 import pytest
 from unittest.mock import MagicMock
-from examples.amazon.generate_slugs import ProductSlugGenerator, slug_similarity, get_llm
+import langdspy
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+
+
+class GenerateSlug(langdspy.PromptSignature):
+    hint_slug = langdspy.HintField(desc="Generate a URL-friendly slug based on the provided H1, title, and product copy. The slug should be lowercase, use hyphens to separate words, and not exceed 50 characters.")
+    
+    h1 = langdspy.InputField(name="H1", desc="The H1 heading of the product page")
+    title = langdspy.InputField(name="Title", desc="The title of the product page")
+    product_copy = langdspy.InputField(name="Product Copy", desc="The product description or copy")
+    
+    slug = langdspy.OutputField(name="Slug", desc="The generated URL-friendly slug")
+
+class ProductSlugGenerator(langdspy.Model):
+    generate_slug = langdspy.PromptRunner(template_class=GenerateSlug, prompt_strategy=langdspy.DefaultPromptStrategy)
+
+    def invoke(self, input_dict, config):
+        h1 = input_dict['h1']
+        title = input_dict['title']
+        product_copy = input_dict['product_copy']
+        
+        slug_res = self.generate_slug.invoke({'h1': h1, 'title': title, 'product_copy': product_copy}, config=config)
+        
+        return slug_res.slug
+
+
+def cosine_similarity_tfidf(true_slugs, predicted_slugs):
+    # Convert slugs to lowercase
+    true_slugs = [slug.lower() for slug in true_slugs]
+    predicted_slugs = [slug.lower() for slug in predicted_slugs]
+
+    # for i in range(len(true_slugs)):
+    #     print(f"Actual Slug: {true_slugs[i]} Predicted: {predicted_slugs[i]}")
+
+    vectorizer = TfidfVectorizer()
+    true_vectors = vectorizer.fit_transform(true_slugs)
+    predicted_vectors = vectorizer.transform(predicted_slugs)
+    similarity_scores = cosine_similarity(true_vectors, predicted_vectors)
+    return similarity_scores.diagonal()
+
+def slug_similarity(X, true_slugs, predicted_slugs):
+    similarity_scores = cosine_similarity_tfidf(true_slugs, predicted_slugs)
+    average_similarity = sum(similarity_scores) / len(similarity_scores)
+    return average_similarity
 
 @pytest.fixture
 def model():

From 710bbe6f93a3612864e7e6355c5374726e0112b2 Mon Sep 17 00:00:00 2001
From: Amir Elaguizy <aelaguiz@gmail.com>
Date: Wed, 20 Mar 2024 22:18:51 -0500
Subject: [PATCH 7/7] Test fixes

---
 .github/workflows/test.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index fd00434..4d51fc5 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -5,6 +5,9 @@ on:
 jobs:
   test:
     runs-on: ubuntu-latest
+    env:
+      OPENAI_API_KEY: "FAKE"
+
     steps:
     - uses: actions/checkout@v2
     - name: Set up Python