Skip to content

Commit

Permalink
feat: initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
cbrinson-rise8 committed Jan 8, 2025
1 parent a58652c commit e42578f
Show file tree
Hide file tree
Showing 18 changed files with 639 additions and 221 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/check_smoke_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ jobs:
done
# Run smoke tests and print the response
JSON_BODY_1='{"record": {"birth_date": "2053-11-07", "sex": "M", "mrn": "1234567890", "name":[{"family":"Shepard", "given":["John"]}]}}'
JSON_BODY_2='{"algorithm": "dibbs-enhanced", "record": {"birth_date": "2000-12-06", "sex": "M", "mrn": "9876543210", "name":[{"family":"Smith", "given":["William"]}]}}'
JSON_BODY_1='{"record": {"birth_date": "2053-11-07", "sex": "M", "identifiers":[{"value": "123456789", "type": "MR"}], "name":[{"family":"Shepard", "given":["John"]}]}}'
JSON_BODY_2='{"algorithm": "dibbs-enhanced", "record": {"birth_date": "2000-12-06", "sex": "M", "identifiers":[{"value": "9876543210", "type": "MR"}], "name":[{"family":"Smith", "given":["William"]}]}}'
#basic tests
RESPONSE_1=$(curl -s -X POST http://localhost:8080/link \
Expand Down
24 changes: 10 additions & 14 deletions docs/site/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,6 @@ linkage evaluation phase. The following features are supported:

: The patient's birthdate (normalized to `YYYY-MM-DD`).

`MRN`

: The patient's medical record number.

`SSN`

: The patient's social security number.

`SEX`

: The patient's sex (normalized to `M`, `F`, or `U` for unknown).
Expand Down Expand Up @@ -83,9 +75,13 @@ linkage evaluation phase. The following features are supported:

: The patient's email address.

`DRIVERS_LICENSE`
`IDENTIFIER`

: An identifier for the patient. Matching on this will check if any identifier type/authority/value combination matches.

: The patient's driver's license number.
`IDENTIFIER:<type>`

: The patient's specific identifier type. For example, `IDENTIFIER:MR` would be the patient's medical record number. Unlike `IDENTIFIER`, this will ONLY compare values of a specific type. Valid type codes can be found here http://hl7.org/fhir/R4/v2/0203/index.html.


### Blocking Key Types
Expand All @@ -97,10 +93,6 @@ patient data and used during query retrieval. The following blocking key types a

: The patients birthdate in the format `YYYY-MM-DD`.

`MRN` (ID: **2**)

: The last 4 characters of a patient's medical record number.

`SEX` (ID: **3**)

: The patient's sex in the format of `M`, `F`, or `U` for unknown.
Expand Down Expand Up @@ -129,6 +121,10 @@ patient data and used during query retrieval. The following blocking key types a

: The first 4 characters of the patient's email address.

`IDENTIFIER` (ID: **10**)

: The identifier triplet containing only the type, authority, and last 4 digits of the value


### Evaluation Functions

Expand Down
8 changes: 4 additions & 4 deletions src/recordlinker/assets/initial_algorithms.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
{
"blocking_keys": [
"BIRTHDATE",
"MRN",
"IDENTIFIER",
"SEX"
],
"evaluators": [
Expand Down Expand Up @@ -75,7 +75,7 @@
{
"blocking_keys": [
"BIRTHDATE",
"MRN",
"IDENTIFIER",
"SEX"
],
"evaluators": [
Expand Down Expand Up @@ -106,7 +106,7 @@
"CITY": 2.438553006137189,
"FIRST_NAME": 6.849475906891162,
"LAST_NAME": 6.350720397426025,
"MRN": 0.3051262572525359,
"IDENTIFIER:MR": 0.3051262572525359,
"SEX": 0.7510419059643679,
"STATE": 0.022376768992488694,
"ZIP": 4.975031471124867
Expand Down Expand Up @@ -148,7 +148,7 @@
"CITY": 2.438553006137189,
"FIRST_NAME": 6.849475906891162,
"LAST_NAME": 6.350720397426025,
"MRN": 0.3051262572525359,
"IDENTIFIER:MR": 0.3051262572525359,
"SEX": 0.7510419059643679,
"STATE": 0.022376768992488694,
"ZIP": 4.975031471124867
Expand Down
22 changes: 7 additions & 15 deletions src/recordlinker/hl7/fhir.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,26 +33,18 @@ def fhir_record_to_pii_record(fhir_record: dict) -> schemas.PIIRecord:
"birthDate": fhir_record.get("birthDate"),
"sex": fhir_record.get("gender"),
"address": fhir_record.get("address", []),
"mrn": None,
"ssn": None,
"race": None,
"gender": None,
"telecom": fhir_record.get("telecom", []),
"drivers_license": None,
"identifiers": [],
}
for identifier in fhir_record.get("identifier", []):
for coding in identifier.get("type", {}).get("coding", []):
if coding.get("code") == "MR":
val["mrn"] = identifier.get("value")
elif coding.get("code") == "SS":
val["ssn"] = identifier.get("value")
elif coding.get("code") == "DL":
license_number = identifier.get("value")
authority = identifier.get("assigner", {}).get("identifier", {}).get("value", "") # Assuming `issuer` contains authority info
val["drivers_license"] = {
"value": license_number,
"authority": authority
}
for code in identifier.get("type", {}).get("coding", []):
val["identifiers"].append({
"value": identifier.get("value"),
"type": code.get("code"),
"authority": identifier.get("assigner", {}).get("identifier", {}).get("value", ""),
})
for address in val["address"]:
address["county"] = address.get("district", "")
for extension in address.get("extension", []):
Expand Down
2 changes: 1 addition & 1 deletion src/recordlinker/linking/link.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def compare(
details: dict[str, typing.Any] = {"patient.reference_id": str(patient.reference_id)}
for e in evals:
# TODO: can we do this check earlier?
feature = getattr(schemas.Feature, e.feature, None)
feature = schemas.Feature.parse(e.feature)
if feature is None:
raise ValueError(f"Invalid comparison field: {e.feature}")
# Evaluate the comparison function and append the result to the list
Expand Down
6 changes: 3 additions & 3 deletions src/recordlinker/linking/matchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def compare_fuzzy_match(
beyond which to classify the strings as a partial match.
:return: A float indicating whether the features are a fuzzy match.
"""
similarity_measure, threshold = _get_fuzzy_params(str(key), **kwargs)
similarity_measure, threshold = _get_fuzzy_params(str(key.attribute), **kwargs)
comp_func = getattr(rapidfuzz.distance, similarity_measure).normalized_similarity
for x in record.feature_iter(key):
for y in patient.record.feature_iter(key):
Expand All @@ -203,11 +203,11 @@ def compare_probabilistic_fuzzy_match(
beyond which to classify the strings as a partial match.
:return: A float of the score the feature comparison earned.
"""
log_odds = kwargs.get("log_odds", {}).get(str(key))
log_odds = kwargs.get("log_odds", {}).get(str(key.attribute))
if log_odds is None:
raise ValueError(f"Log odds not found for feature {key}")

similarity_measure, threshold = _get_fuzzy_params(str(key), **kwargs)
similarity_measure, threshold = _get_fuzzy_params(str(key.attribute), **kwargs)
comp_func = getattr(rapidfuzz.distance, similarity_measure).normalized_similarity
max_score = 0.0
for x in patient.record.feature_iter(key):
Expand Down
2 changes: 1 addition & 1 deletion src/recordlinker/models/mpi.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,14 +121,14 @@ class BlockingKey(enum.Enum):
"""

BIRTHDATE = ("BIRTHDATE", 1, "Date of birth as YYYY-MM-DD")
MRN = ("MRN", 2, "Last 4 characters of Medical record number")
SEX = ("SEX", 3, "Sex at birth; M, F or U")
ZIP = ("ZIP", 4, "5 digital US Postal Code")
FIRST_NAME = ("FIRST_NAME", 5, "First 4 characters of the first name")
LAST_NAME = ("LAST_NAME", 6, "First 4 characters of the last name")
ADDRESS = ("ADDRESS", 7, "First 4 characters of the address")
PHONE = ("PHONE", 8, "Last 4 characters of the phone number")
EMAIL = ("EMAIL", 9, "First 4 characters of the email address")
IDENTIFIER = ("IDENTIFIER", 10, "Identifier triplet with only last 4 character of the value. Format \"type:authority:value\"")

def __init__(self, value: str, _id: int, description: str):
self._value = value
Expand Down
2 changes: 2 additions & 0 deletions src/recordlinker/schemas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from .mpi import PatientRef
from .mpi import PersonRef
from .pii import Feature
from .pii import FeatureAttribute
from .pii import PIIRecord
from .seed import Cluster
from .seed import ClusterGroup
Expand All @@ -22,6 +23,7 @@
"AlgorithmPass",
"AlgorithmSummary",
"Feature",
"FeatureAttribute",
"PIIRecord",
"Prediction",
"LinkInput",
Expand Down
3 changes: 1 addition & 2 deletions src/recordlinker/schemas/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,9 @@ class Evaluator(pydantic.BaseModel):

model_config = pydantic.ConfigDict(from_attributes=True, use_enum_values=True)

feature: Feature
feature: str = pydantic.Field(json_schema_extra={"enum": Feature.all_options()})
func: matchers.FeatureFunc


class AlgorithmPass(pydantic.BaseModel):
"""
The schema for an algorithm pass record.
Expand Down
Loading

0 comments on commit e42578f

Please sign in to comment.