Renamed function resolve_entity.

genomoncology · Mar 25, 2024 · 22ab240 · 22ab240
1 parent 9ef16dc
commit 22ab240
Show file tree

Hide file tree

Showing 8 changed files with 120 additions and 64 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,11 +1,18 @@
+## v0.1.1 (2023-03-25)
+
+#### Changed
+ - Fixes to the README regarding validation utility functions.
+ - Renamed ill-named function to `resolve_entity` and added explicit test.
+
+
 ## v0.1.0 (2023-03-25)
 
 The project's typing system was validated using mypy and refactored to follow
 Annotated types as specified by [PEP 593](https://peps.python.org/pep-0593/).
 
 #### Added
  - FuzzValidator annotation type created to simplify design
- - validate_python, validate_json, and validate_entity functions added
+ - validate_python and validate_json functions added
  - Added Language, LanguageName, and LanguageCode usable types
  - fuzztypes.logger and fuzztypes.utils module for downloading iso codes
 

diff --git a/README.md b/README.md
@@ -146,28 +146,6 @@ assert obj.model_dump() == {
 }
 ```
 
-Types can also be used outside of Pydantic models to validate and normalize data:
-
-```python
-from fuzztypes import Date, Fuzzmoji
-
-# access value via "call" (parenthesis)
-assert Date("1 JAN 2023").isoformat() == "2023-01-01"
-assert Fuzzmoji("tada") == '🎉'
-
-# access entity via "key lookup" (square brackets)
-assert Fuzzmoji["movie cam"].value == "🎥"
-assert Fuzzmoji["movie cam"].aliases == [':movie_camera:', 'movie camera']
-assert Fuzzmoji["movie cam"].model_dump() == {
-    'value': '🎥',
-    'label': None,
-    'meta': None,
-    'priority': None,
-    'aliases': [':movie_camera:', 'movie camera']
-}
-```
-
-
 ## Installation
 
 Available on [PyPI](https://pypi.org/project/FuzzTypes/):
@@ -212,19 +190,22 @@ specific data validation and normalization requirements.
 Usable types are pre-built annotation types in FuzzTypes that can be directly used in Pydantic models. They provide
 convenient and ready-to-use functionality for common data types and scenarios.
 
-| Type        | Description                                                                               |
-|-------------|-------------------------------------------------------------------------------------------|
-| `ASCII`     | Converts Unicode strings to ASCII equivalents using either `anyascii` or `unidecode`.     |
-| `Date`      | Converts date strings to `date` objects using `dateparser`.                               |
-| `Email`     | Extracts email addresses from strings using a regular expression.                         |
-| `Emoji`     | Matches emojis based on Unicode Consortium aliases using the `emoji` library.             |
-| `Fuzzmoji`  | Matches emojis using fuzzy string matching against aliases.                               |
-| `Integer`   | Converts numeric strings or words to integers using `number-parser`.                      |
-| `Person`    | Parses person names into subfields (e.g., first, last, suffix) using `python-nameparser`. |
-| `SSN`       | Extracts U.S. Social Security Numbers from strings using a regular expression.            |
-| `Time`      | Converts datetime strings to `datetime` objects using `dateparser`.                       |
-| `Vibemoji`  | Matches emojis using semantic similarity against aliases.                                 |
-| `Zipcode`   | Extracts U.S. ZIP codes (5 or 9 digits) from strings using a regular expression.          |
+| Type           | Description                                                                               |
+|----------------|-------------------------------------------------------------------------------------------|
+| `ASCII`        | Converts Unicode strings to ASCII equivalents using either `anyascii` or `unidecode`.     |
+| `Date`         | Converts date strings to `date` objects using `dateparser`.                               |
+| `Email`        | Extracts email addresses from strings using a regular expression.                         |
+| `Emoji`        | Matches emojis based on Unicode Consortium aliases using the `emoji` library.             |
+| `Fuzzmoji`     | Matches emojis using fuzzy string matching against aliases.                               |
+| `Integer`      | Converts numeric strings or words to integers using `number-parser`.                      |
+| `LanguageCode` | Resolves language to ISO language codes (e.g., "en").                                     |
+| `LanguageName` | Resolves language to ISO language names (e.g., "English").                                |
+| `Language`     | Resolves language to ISO language object (name, alpha_2, alpha_3, scope, type, etc.).     |
+| `Person`       | Parses person names into subfields (e.g., first, last, suffix) using `python-nameparser`. |
+| `SSN`          | Extracts U.S. Social Security Numbers from strings using a regular expression.            |
+| `Time`         | Converts datetime strings to `datetime` objects using `dateparser`.                       |
+| `Vibemoji`     | Matches emojis using semantic similarity against aliases.                                 |
+| `Zipcode`      | Extracts U.S. ZIP codes (5 or 9 digits) from strings using a regular expression.          |
 
 These usable types provide a wide range of commonly needed data validations and transformations, making it
 easier to work with various data formats and perform tasks like parsing, extraction, and matching.
@@ -298,29 +279,13 @@ data for use in precision oncology clinical decision support systems. Contact me
 offerings.
 
 
-## Roadmap
-
-Additional capabilities will soon be added:
-
-- Complete OnDiskValidator [fuzzy string matching](https://github.com/quickwit-oss/tantivy-py/issues/20).
-- Reranking models
-- Hybrid search (linear and reciprocal rank fusion using fuzzy and semantic)
-- Trie-based autocomplete and aho-corasick search
-- `Humanize` intword and ordinals
-- `Pint` quantities
-- `Country` and `Currency` codes/names
-
-The following usable types are planned for future implementation in FuzzTypes:
-
 | Type           | Description                                                                               |
 |----------------|-------------------------------------------------------------------------------------------|
 | `AirportCode`  | Represents airport codes (e.g., "ORD").                                                   |
 | `Airport`      | Represents airport names (e.g., "O'Hare International Airport").                          |
 | `CountryCode`  | Represents ISO country codes (e.g., "US").                                                |
 | `Country`      | Represents country names (e.g., "United States").                                         |
 | `Currency`     | Represents currency codes (e.g., "USD").                                                  |
-| `LanguageCode` | Represents ISO language codes (e.g., "en").                                               |
-| `Language`     | Represents language names (e.g., "English").                                              |
 | `Quantity`     | Converts strings to `Quantity` objects with value and unit using `pint`.                  |
 | `URL`          | Represents normalized URLs with tracking parameters removed using `url-normalize`.        |
 | `USStateCode`  | Represents U.S. state codes (e.g., "CA").                                                 |
@@ -495,7 +460,6 @@ assert model.name == "JOHN"
 ```
 
 
-
 ### Regex
 
 The `Regex` base type allows matching values using a regular
@@ -568,4 +532,51 @@ assert obj.model_dump(exclude_defaults=True, mode="json") == {
     "language_code": "en",
     "language_name": "English",
 }
+```
+
+### Validate Python and JSON functions
+
+Functional approach to validating python and json are available.
+Below are examples for the `validate_python` and `validate_json` functions:
+
+```python
+from pydantic import BaseModel
+from fuzztypes import validate_python, validate_json, Integer, Date
+
+# validate python
+assert validate_python(Integer, "two hundred") == 200
+
+# validate json
+class MyModel(BaseModel):
+    date: Date
+
+json = '{"date": "July 4th 2021"}'
+obj = validate_json(MyModel, json)
+assert obj.date.isoformat() == "2021-07-04"
+```
+
+### Resolve Entities from FuzzValidator or Annotation
+
+Entities can be resolved from the `FuzzValidator` validators such as InMemoryValidator
+or OnDiskValidator or the defined `Annotation` type using the `resolve_entity` function:
+
+```python
+from typing import Annotated
+from fuzztypes import resolve_entity, InMemoryValidator
+
+elements = ["earth", "fire", "water", "air"]
+ElementValidator = InMemoryValidator(elements)
+Element = Annotated[str, ElementValidator]
+
+assert resolve_entity(ElementValidator, "EARTH").model_dump() == {
+    "aliases": [],
+    "label": None,
+    "meta": None,
+    "priority": None,
+    "value": "earth",
+}
+
+assert resolve_entity(Element, "Air").model_dump(
+    exclude_defaults=True
+) == {"value": "air"}
 ```
diff --git a/src/fuzztypes/__init__.py b/src/fuzztypes/__init__.py
@@ -21,7 +21,7 @@
 # Validation
 from .validation import (
     FuzzValidator,
-    validate_entity,
+    resolve_entity,
     validate_python,
     validate_json,
     get_type_adapter,
@@ -89,7 +89,7 @@
     "lazy",
     "logger",
     "utils",
-    "validate_entity",
+    "resolve_entity",
     "validate_json",
     "validate_python",
 )
diff --git a/src/fuzztypes/validation.py b/src/fuzztypes/validation.py
@@ -55,7 +55,7 @@ def validate_python(cls: Any, value: Any) -> Any:
     return ta.validate_python(value)
 
 
-def validate_entity(cls: Any, value: Any) -> Optional[Entity]:
+def resolve_entity(cls: Any, value: Any) -> Optional[Entity]:
     """
     Returns entity from metadata if cls is a FuzzValidator.
 

diff --git a/tests/in_memory/test_in_memory_tags_example.py b/tests/in_memory/test_in_memory_tags_example.py
@@ -19,7 +19,7 @@
     EntitySource,
     InMemoryValidator,
     flags,
-    validate_entity,
+    resolve_entity,
     validate_python,
     Entity,
 )
@@ -52,11 +52,11 @@ def Tag(TagSource):
 
 
 def test_get_entity_from_annotation(Tag):
-    entity = validate_entity(Tag, "2d")
+    entity = resolve_entity(Tag, "2d")
     assert isinstance(entity, Entity)
     assert entity.priority == 3
 
-    entity = validate_entity(Tag, "3d")
+    entity = resolve_entity(Tag, "3d")
     assert isinstance(entity, Entity)
     assert entity.priority == 14
 

diff --git a/tests/on_disk/test_on_disk_alias.py b/tests/on_disk/test_on_disk_alias.py
@@ -75,9 +75,8 @@ def test_duplicate_records():
         assert False, "Didn't raise exception!"
     except KeyError as e:
         assert (
-            str(e)
-            == '\'Key Error: b '
-               '["b" could not be resolved, did you mean "c", "a", or "d"?]\''
+            str(e) == "'Key Error: b "
+            '["b" could not be resolved, did you mean "c", "a", or "d"?]\''
         )
 
     A = OnDiskValidator("DupeRec", source, tiebreaker_mode="lesser")

diff --git a/tests/on_disk/test_on_disk_name.py b/tests/on_disk/test_on_disk_name.py
@@ -2,7 +2,7 @@
 
 from pydantic import BaseModel, ValidationError, Field
 
-from fuzztypes import NamedEntity, OnDiskValidator, flags, validate_entity
+from fuzztypes import NamedEntity, OnDiskValidator, flags, resolve_entity
 
 names = ["George Washington", "John Adams", "Thomas Jefferson"]
 President = OnDiskValidator(
@@ -34,7 +34,7 @@ def test_namestr_getitem():
     entity = NamedEntity(value="Thomas Jefferson")
     assert President["Thomas Jefferson"] == entity
     assert President["THOMAS JEFFERSON"] == entity
-    assert validate_entity(President, "Thomas Jefferson") == entity
+    assert resolve_entity(President, "Thomas Jefferson") == entity
 
     assert CasedPrez["Thomas Jefferson"] == entity
     try:

diff --git a/tests/test_readme.py b/tests/test_readme.py
@@ -279,3 +279,42 @@ class MyModel(BaseModel):
 
     model = MyModel(ip_address="My internet IP address is 192.168.127.12")
     assert model.ip_address == "192.168.127.12"
+
+
+def test_validate_functions():
+    from fuzztypes import validate_python, validate_json, resolve_entity, Date
+
+    # validate python
+    assert validate_python(Integer, "two hundred") == 200
+
+    # validate json
+    class MyModel(BaseModel):
+        date: Date
+
+    json = '{"date": "July 4th 2021"}'
+    obj = validate_json(MyModel, json)
+    assert obj.date.isoformat() == "2021-07-04"
+
+
+def test_resolve_entity():
+    from fuzztypes import resolve_entity, InMemoryValidator
+
+    elements = ["earth", "fire", "water", "air"]
+    ElementValidator = InMemoryValidator(elements)
+    Element = Annotated[str, ElementValidator]
+
+    # resolve using validator
+    entity = resolve_entity(ElementValidator, "EARTH")
+    assert entity is not None
+    assert entity.model_dump() == {
+        "aliases": [],
+        "label": None,
+        "meta": None,
+        "priority": None,
+        "value": "earth",
+    }
+
+    # resolve using annotation type
+    entity = resolve_entity(Element, "Air")
+    assert entity is not None
+    assert entity.model_dump(exclude_defaults=True) == {"value": "air"}