diff --git a/compose.yml b/compose.yml index 208919df..4bd76a91 100644 --- a/compose.yml +++ b/compose.yml @@ -76,3 +76,22 @@ services: depends_on: api: condition: service_healthy + +# TODO: fix this + algo-test-runner: + image: "python:3.11-slim" + env_file: + - tests/algorithm/algo.env + environment: + DB_URI: "postgresql+psycopg2://postgres:pw@db:5432/postgres" + API_URL: "http://api:8080" + command: sh -c "pip install pandas && python scripts/seed_db.py" + volumes: + - ./tests/algorithm/scripts:/scripts + depends_on: + db: + condition: service_healthy + api: + condition: service_healthy + profiles: + - algo-test \ No newline at end of file diff --git a/tests/algorithm/README.md b/tests/algorithm/README.md new file mode 100644 index 00000000..728f8d20 --- /dev/null +++ b/tests/algorithm/README.md @@ -0,0 +1,45 @@ +# Record Linkage Algorithm Testing + +This repository contains a project to test the effectiveness of the RecordLinker algorithm. + +## Prerequisites + +Before getting started, ensure you have the following installed: + +- [Docker](https://docs.docker.com/engine/install/) +- [Docker Compose](https://docs.docker.com/compose/install/) + +## Setup + +Before getting started, ensure you have the following installed: + +- [Docker](https://docs.docker.com/engine/install/) +- [Docker Compose](https://docs.docker.com/compose/install/) + + + diff --git a/tests/algorithm/algo.env b/tests/algorithm/algo.env new file mode 100644 index 00000000..b1b3a919 --- /dev/null +++ b/tests/algorithm/algo.env @@ -0,0 +1,3 @@ +SEED_CSV_FILE= +DB_URI="tests" +API_URL="api estst" \ No newline at end of file diff --git a/tests/algorithm/scripts/seed_db.py b/tests/algorithm/scripts/seed_db.py new file mode 100644 index 00000000..4ff0aaab --- /dev/null +++ b/tests/algorithm/scripts/seed_db.py @@ -0,0 +1,61 @@ +import sys +import os +import requests +import pandas as pd + + +def seed_database(api_url, csv_file): + # # Load the CSV data + df = pd.read_csv(csv_file) + + cluster_group = [] + + for _, row in df.iterrows(): + # Convert the row to a dictionary + record_data = row.to_dict() + + # convert row to a pii_record + pii_record = { + "external_id": record_data['ID'], + "birth_date": record_data['BIRTHDATE'], + "sex": record_data['GENDER'], + "address": [ + { + "line": [record_data['ADDRESS']], + "city": record_data['CITY'], + "state": record_data['STATE'], + "county": record_data['COUNTY'], + "postal_code": str(record_data['ZIP']) + } + ], + "name": [ + { + "given": [record_data['FIRST']], + "family": record_data['LAST'] + } + ], + "ssn": record_data['SSN'], + "race": record_data['RACE'] + } + + # nesting for the seeding api request + cluster = {"records": [pii_record]} + cluster_group.append(cluster) + + # # make request to api to seed the db + # try: + # response = requests.post(api_url, json=pii_record) + # response.raise_for_status() # Raise an error for bad status codes + # print(f"Successfully posted record {pii_record['external_id']}: {response.status_code}") + # except requests.exceptions.RequestException as e: + # print(f"Failed to post record {pii_record['external_id']}: {e}") + +if __name__ == "__main__": + if len(sys.argv) != 2: + print("Usage: python seed_db.py ") + sys.exit(1) + + csv_file = sys.argv[1] + + + seed_database("http://localhost:8080/", csv_file) diff --git a/tests/algorithm/scripts/test.py b/tests/algorithm/scripts/test.py new file mode 100644 index 00000000..7d96cf7d --- /dev/null +++ b/tests/algorithm/scripts/test.py @@ -0,0 +1 @@ +print("Heloo World") \ No newline at end of file