From 26880ab84b51394dc82e5304476a057f36000016 Mon Sep 17 00:00:00 2001 From: obdulia-losantos Date: Thu, 6 May 2021 14:43:12 +0200 Subject: [PATCH 1/2] dynamic region and district --- .env | 6 ++- .gitignore | 3 +- docker-compose.yml | 12 +++-- src/Dockerfile | 34 +++++++----- src/config/requirements.txt | 6 +-- src/config/setup_and_run.sh | 10 +++- src/generator/README.md | 20 +++---- src/importer/generator/contacts.py | 4 +- src/importer/generator/district.py | 10 ++-- src/importer/generator/event.py | 5 +- src/importer/generator/region.py | 12 +++-- src/importer/generator/user.py | 24 +++++++-- src/importer/main.py | 20 +++---- src/importer/universe/world.py | 85 ++++++++++++++++++++---------- 14 files changed, 160 insertions(+), 91 deletions(-) diff --git a/.env b/.env index 6c86bd0..dbda008 100644 --- a/.env +++ b/.env @@ -1,5 +1,9 @@ CASE_COUNT=4 EVENT_COUNT=3 +REGION=Niedersachsen +DISTRICT=Wolfsburg +DISEASE=CORONAVIRUS + OPENAPI_GENERATOR_VERSION=4.3.1 PERFORMANCE_LOG_DIR=/srv/timings LOGLEVEL=info @@ -97,5 +101,3 @@ SORMAS_ORG_ID=HZI_TEST SORMAS_ORG_NAME="HZI Braunschweig" SORMAS_S2S_CERT_PASS=password SORMAS_S2S_REST_PASSWORD=passwordpassword - - diff --git a/.gitignore b/.gitignore index 8d373f9..bce042c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ docker-data/ .idea -timings/* \ No newline at end of file +timings/* +.vscode diff --git a/docker-compose.yml b/docker-compose.yml index 64a3f69..91ebbd1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -89,17 +89,23 @@ services: context: src/ args: - DOCKERIZED=false - - SORMAS_VERSION=${SORMAS_VERSION} - OPENAPI_GENERATOR_VERSION=${OPENAPI_GENERATOR_VERSION} - PERFORMANCE_LOG_DIR=${PERFORMANCE_LOG_DIR} + - SORMAS_VERSION=${SORMAS_VERSION} environment: + - DOMAIN_NAME=${DOMAIN_NAME} + - SORMAS_REST_USERNAME=${SORMAS_REST_USERNAME} + - SORMAS_REST_PASSWORD=${SORMAS_REST_PASSWORD} - SORMAS_POSTGRES_USER=${SORMAS_POSTGRES_USER} - SORMAS_POSTGRES_PASSWORD=${SORMAS_POSTGRES_PASSWORD} - - SORMAS_SERVER_URL=${SORMAS_SERVER_URL} - DB_HOST=${DB_HOST} - - DOMAIN_NAME=${DOMAIN_NAME} + - DB_NAME=${DB_NAME} + - SUPERVISOR_UUID=${SUPERVISOR_UUID} - CASE_COUNT=${CASE_COUNT} - EVENT_COUNT=${EVENT_COUNT} + - REGION=${REGION} + - DISTRICT=${DISTRICT} + - DISEASE=${DISEASE} - PERFORMANCE_LOG_DIR=${PERFORMANCE_LOG_DIR} - ANALYZE_PERFORMANCE=${ANALYZE_PERFORMANCE} - LOGLEVEL=${LOGLEVEL} diff --git a/src/Dockerfile b/src/Dockerfile index 390925a..ab51bb0 100644 --- a/src/Dockerfile +++ b/src/Dockerfile @@ -1,22 +1,28 @@ FROM debian:buster-slim -ARG SORMAS_VERSION -ARG OPENAPI_GENERATOR_VERSION + ARG DOCKERIZED=false +ARG OPENAPI_GENERATOR_VERSION ARG PERFORMANCE_LOG_DIR +ARG SORMAS_VERSION + # see https://github.com/debuerreotype/debuerreotype/issues/10 RUN mkdir -p /usr/share/man/man1 -RUN apt update && apt upgrade -y -RUN apt install --no-install-recommends -y \ - curl wget unzip vim gcc \ - python3 python3-pip python3-venv python3-dev \ - maven jq \ - postgresql libpq-dev +RUN apt update > /dev/null && \ + apt upgrade -y > /dev/null && \ + apt install --no-install-recommends -y \ + curl wget unzip vim gcc \ + python3 python3-pip python3-venv python3-dev \ + maven jq \ + postgresql libpq-dev > /dev/null WORKDIR /srv -RUN mkdir $PERFORMANCE_LOG_DIR -COPY config/setup_and_run.sh setup_and_run.sh -COPY importer/gen-client.sh /srv/importer/ -COPY config/requirements.txt /srv/importer/ -RUN importer/gen-client.sh -COPY . /srv CMD ["./setup_and_run.sh"] + +COPY config/setup_and_run.sh setup_and_run.sh +COPY config/requirements.txt /srv/importer/ +COPY importer/gen-client.sh /srv/importer/ + +RUN mkdir $PERFORMANCE_LOG_DIR && \ + importer/gen-client.sh + +COPY . /srv diff --git a/src/config/requirements.txt b/src/config/requirements.txt index 5c654e0..a336e9b 100644 --- a/src/config/requirements.txt +++ b/src/config/requirements.txt @@ -1,6 +1,6 @@ -psycopg2==2.8.6 +click numpy==1.19.4 pandas==1.1.5 -click -zeep +psycopg2-binary==2.8.6 pygrok +zeep diff --git a/src/config/setup_and_run.sh b/src/config/setup_and_run.sh index 0bb29a0..39c4408 100755 --- a/src/config/setup_and_run.sh +++ b/src/config/setup_and_run.sh @@ -1,5 +1,13 @@ #!/bin/bash source venv/bin/activate + echo "Starting import" + cd importer || exit -python3 main.py --case-count $CASE_COUNT --event-count $EVENT_COUNT + +python3 main.py \ + --case-count $CASE_COUNT \ + --event-count $EVENT_COUNT + --region $REGION \ + --district $DISTRICT + --disease $DISEASE diff --git a/src/generator/README.md b/src/generator/README.md index a5a9186..65a902d 100644 --- a/src/generator/README.md +++ b/src/generator/README.md @@ -4,7 +4,7 @@ An overview can be found in the presentation [presentation/sormas-oegd-credible- The scripts consists essentially in three parts: 1. the parameters and functions -2. the reading of input data in [data/in/](data/in/) (the case counts from RKI) and the generating of the data set, wich is stored as RDS, CSV and Excel files in [data/out/](data/out/) +2. the reading of input data in [data/in/](data/in/) (the case counts from RKI) and the generating of the data set, which is stored as RDS, CSV and Excel files in [data/out/](data/out/) 3. the plots, which are stored in [img/](img/) Parts 2 and 3 are largely independent. Part to is within `if (generate_dataset) {...}`, if `generate_dataset` is `FALSE`, the data set is loaded from the RDS files to be used for the plots. @@ -24,7 +24,7 @@ Not simulations or scenarios! Rather "play" and demonstration data: also readab ### Adding new fields for a more complete data set Addresses: -- assign a proper address to each geolocation, e.g., through the Google Maps API +- assign a proper address to each geolocation, e.g., through the Google Maps API Tests, quarantine, isolation: - tested persons as fourth person category besides case, contact and participant (one person can be any number of those at the same time) @@ -33,15 +33,15 @@ Tests, quarantine, isolation: - whether a case is in isolation - dates of isolation/quarantine start and end -Occupation/setting of a person, an infection, an event, especially according "household", screening, and to IfSG definitions (), see RKI's [Epidemiologisches Bulletin](https://www.rki.de/DE/Content/Infekt/EpidBull/epid_bull_node.html) 38/20 and RKI's Lagebricht +Occupation/setting of a person, an infection, an event, especially according "household", screening, and to IfSG definitions (), see RKI's [Epidemiologisches Bulletin](https://www.rki.de/DE/Content/Infekt/EpidBull/epid_bull_node.html) 38/20 and RKI's Lagebricht Context: -- place of infection (address, location ,type) -- type of venue/setting for infection, contact, event venue, +- place of infection (address, location ,type) +- type of venue/setting for infection, contact, event venue, - name of event - date of contact -Hospitalization and death: +Hospitalization and death: - whether in ICU - dates of hospitalization start and end - date of death @@ -78,7 +78,7 @@ Transmission and infection dynamics: - age-dependent susceptibility - age-dependent infectiosity - more realistic period of infectiosity -- age-dependent probability of being in a type of place of infection, of contact or event setting +- age-dependent probability of being in a type of place of infection, of contact or event setting - transmission risk dependent on contact and event setting type - contacts between cases follow the same social-contact-matrix based probabilities @@ -87,8 +87,8 @@ Age (and sex?) dependent symptoms: - given that one has symptoms, age (and sex?) dependent symptom and age specific probabilities from the literature - non-cases may also have symptoms (?) -Spread dynamics: -- not just one retrospective snapshot, but day-to-day changes in reporting, contacts, etc.: a contact can get sick, contacted, be tested (negtive or positive > case), go into quarantine... +Spread dynamics: +- not just one retrospective snapshot, but day-to-day changes in reporting, contacts, etc.: a contact can get sick, contacted, be tested (negative or positive > case), go into quarantine... Tests: - take positive rate into account @@ -137,4 +137,4 @@ Modelling: ## Authors -## License \ No newline at end of file +## License diff --git a/src/importer/generator/contacts.py b/src/importer/generator/contacts.py index 20ea0a8..df5cd9e 100644 --- a/src/importer/generator/contacts.py +++ b/src/importer/generator/contacts.py @@ -59,11 +59,11 @@ def gen_contact_dto(person_uuid, case_uuid, disease): creation_date=dnow(), # todo required missing change_date=dnow(), # todo required missing reporting_user=surv_sup_user_ref(), - last_contact_date=datetime.date.fromisoformat('2020-02-01'), + last_contact_date=datetime.date.fromisoformat('2021-05-01'), disease=disease, caze=case_ref(case_uuid), # todo validation exception talking about region health_conditions=gen_health_condition_dto(), # todo ContactFacadeRjb:1092 nullpointer if missing - # todo contact classifaction is required in UI + # todo contact classification is required in UI multi_day_contact=False # todo check required ) return contact_dto diff --git a/src/importer/generator/district.py b/src/importer/generator/district.py index 7940939..471ac98 100644 --- a/src/importer/generator/district.py +++ b/src/importer/generator/district.py @@ -10,7 +10,7 @@ def default_district(): with sormas_db_connect() as conn: with conn.cursor() as cur: - cur.execute("SELECT uuid FROM district") + cur.execute("SELECT uuid FROM district LIMIT 1") uuid = cur.fetchone()[0] return DistrictReferenceDto(uuid=uuid) @@ -34,8 +34,10 @@ def insert_district(district, region_id): _id = max(all_ids) + 1 date = datetime.date.today() uuid = duuid() - cur.execute("INSERT INTO district (id, changedate, creationdate, name, uuid, region_id, epidcode, archived)" - "VALUES (%s,%s, %s, %s, %s, %s, %s, %s)", - [_id, date, date, district, uuid, region_id, 'DIS', False]) + cur.execute( + "INSERT INTO district" + " (id, changedate, creationdate, name, uuid, region_id, epidcode, archived)" + " VALUES (%s,%s, %s, %s, %s, %s, %s, %s)", + [_id, date, date, district, uuid, region_id, 'DIS', False]) return _id, uuid diff --git a/src/importer/generator/event.py b/src/importer/generator/event.py index d24bb64..ef25e28 100644 --- a/src/importer/generator/event.py +++ b/src/importer/generator/event.py @@ -20,7 +20,7 @@ random.seed(42) -def gen_event_dto(event_desc=None, start_date=None, location=None,disease= None): +def gen_event_dto(event_desc=None, start_date=None, location=None, disease=None): if event_desc is None: raise NotImplementedError @@ -41,8 +41,7 @@ def gen_event_dto(event_desc=None, start_date=None, location=None,disease= None) reporting_user=surv_sup_user_ref(), start_date=start_date, event_location=location, - disease=Disease.CORONAVIRUS - # todo region + district is required in the UI! + disease=disease, ) return event_dto diff --git a/src/importer/generator/region.py b/src/importer/generator/region.py index b242a1f..accdda3 100644 --- a/src/importer/generator/region.py +++ b/src/importer/generator/region.py @@ -10,7 +10,7 @@ def default_region(): with sormas_db_connect() as conn: with conn.cursor() as cur: - cur.execute("SELECT uuid FROM region") + cur.execute("SELECT uuid FROM region LIMIT 1") uuid = cur.fetchone()[0] return RegionReferenceDto(uuid=uuid) @@ -28,13 +28,15 @@ def insert_region(region): if exists: logging.info(f'{region} already exists in the DB, value was {exists}') return exists[0], exists[1] + cur.execute("SELECT id FROM region") all_ids = list(chain.from_iterable(cur.fetchall())) _id = max(all_ids) + 1 date = datetime.date.today() uuid = duuid() - cur.execute("INSERT INTO region (id,changedate, creationdate, name, uuid, epidcode, archived)" - "VALUES (%s,%s, %s, %s, %s, %s, %s)", - [_id, date, date, region, uuid, 'REG', False] - ) + cur.execute( + "INSERT INTO region" + " (id,changedate, creationdate, name, uuid, epidcode, archived)" + " VALUES (%s,%s, %s, %s, %s, %s, %s)", + [_id, date, date, region, uuid, 'REG', False]) return _id, uuid diff --git a/src/importer/generator/user.py b/src/importer/generator/user.py index def51d7..72f71be 100644 --- a/src/importer/generator/user.py +++ b/src/importer/generator/user.py @@ -1,3 +1,5 @@ +import os + from sormas import UserReferenceDto from generator.utils import sormas_db_connect @@ -5,8 +7,20 @@ # todo make this deterministic def surv_sup_user_ref(): - with sormas_db_connect() as conn: - with conn.cursor() as cur: - cur.execute("SELECT uuid FROM users WHERE firstname = 'Surveillance' AND lastname = 'Supervisor'") - uuid = cur.fetchone()[0] - return UserReferenceDto(uuid=uuid) + sup_uuid = os.getenv('SUPERVISOR_UUID') + if sup_uuid: + return UserReferenceDto(uuid=sup_uuid) + else: + sup_first_name = os.getenv('SUPERVISOR_FIRSTNAME', 'Surveillance') + sup_last_name = os.getenv('SUPERVISOR_LASTNAME', 'Supervisor') + + with sormas_db_connect() as conn: + with conn.cursor() as cur: + cur.execute(f""" + SELECT uuid + FROM users + WHERE firstname = '{sup_first_name}' + AND lastname = '{sup_last_name}' + """) + uuid = cur.fetchone()[0] + return UserReferenceDto(uuid=uuid) diff --git a/src/importer/main.py b/src/importer/main.py index 88e5115..7324867 100644 --- a/src/importer/main.py +++ b/src/importer/main.py @@ -5,6 +5,8 @@ from performance.evaluate import analyze_performance from universe.world import World +from sormas import Disease + # noinspection PyArgumentList logging.basicConfig( @@ -16,22 +18,20 @@ @click.command() @click.option('--case-count', default=1, help='Number of cases you want to import.') @click.option('--event-count', default=1, help='Number of events you want to import.') -def main(case_count, event_count): +@click.option('--region', default='Niedersachsen', help='Region location.') +@click.option('--district', default='Wolfsburg', help='District location.') +@click.option('--disease', default=Disease.CORONAVIRUS, help='Type of disease.') +def main(case_count, event_count, region, district, disease): logging.info(f'Importing {case_count} cases') logging.info(f'Importing {event_count} events') # Set everything up # Create our world where we simulate a pandemic. This is our playground. # Set a beginning for our world - world = World() + world = World(disease=disease) - # Populate default entities in our world - # Counties of interest - lower_saxony = 'Niedersachsen' - world.add_region(lower_saxony) - world.add_district('Braunschweig', lower_saxony) - world.add_district('Salzgitter', lower_saxony) - world.add_district('Wolfsburg', lower_saxony) + # # Populate default entities in our world + world.add_district(district, region) world.pre_populate_cases_and_contacts(n=case_count) @@ -46,6 +46,8 @@ def main(case_count, event_count): if os.environ.get('ANALYZE_PERFORMANCE', 'False').upper() == 'TRUE': analyze_performance() + logging.info('Done') + if __name__ == '__main__': main() diff --git a/src/importer/universe/world.py b/src/importer/universe/world.py index 9f98291..145fa46 100644 --- a/src/importer/universe/world.py +++ b/src/importer/universe/world.py @@ -26,43 +26,64 @@ class World: - def __init__(self): + def __init__(self, disease=Disease.CORONAVIRUS): self.today = Tick() self.model = self._load_model() + sormas_domain = os.environ.get("DOMAIN_NAME") host = sormas_domain if sormas_domain else 'localhost' - configuration = sormas_api.Configuration( host=f'http://{host}:6080/sormas-rest', - username='SurvOff', - password='SurvOff' + username=os.getenv('SORMAS_REST_USERNAME', 'SurvOff'), + password=os.getenv('SORMAS_REST_PASSWORD', 'SurvOff'), ) configuration.verify_ssl = False configuration.debug = True self.sormas_api_config = configuration + self.current_disease = disease self.regions = {} + self.current_region = {} self.districts = {} + self.current_district = {} with sormas_db_connect() as conn: with conn.cursor() as cur: - cur.execute("SELECT name, id, uuid FROM region") - res = cur.fetchone() - logging.info(f'Region: {res}') - self.regions[res[0]] = {} - self.regions[res[0]]['id'] = res[1] - self.regions[res[0]]['uuid'] = res[2] - cur.execute("SELECT name, id, uuid FROM district") - res = cur.fetchone() - self.districts[res[0]] = {} - self.districts[res[0]]['id'] = res[1] - self.districts[res[0]]['uuid'] = res[2] - logging.info(f'District: {res}') - - # disable other diseases - cur.execute("UPDATE diseaseconfiguration SET active = true WHERE disease = 'CORONAVIRUS'") - cur.execute("UPDATE diseaseconfiguration SET active = false WHERE disease != 'CORONAVIRUS'") + cur.execute("SELECT name, id, uuid FROM region ORDER BY name") + regions = cur.fetchall() + for res in regions: + logging.info(f'Region: {res}') + self.regions[res[0]] = {} + self.regions[res[0]]['id'] = res[1] + self.regions[res[0]]['uuid'] = res[2] + + cur.execute(""" + SELECT d.name, d.id, d.uuid , r.uuid + FROM district d + LEFT OUTER JOIN region r + ON d.region_id = r.id + ORDER BY r.name, d.name + """) + districts = cur.fetchall() + for res in districts: + logging.info(f'District: {res}') + self.regions[res[0]] = {} + self.regions[res[0]]['id'] = res[1] + self.regions[res[0]]['uuid'] = res[2] + self.regions[res[0]]['region'] = res[3] + + cur.execute(f"UPDATE diseaseconfiguration SET active = true WHERE disease = '{self.current_disease}'") + # # disable other diseases + # cur.execute(f"UPDATE diseaseconfiguration SET active = false WHERE disease != '{self.current_disease}'") + + # default region and district + if self.regions: + first_reg = list(self.regions.keys())[0] + self.current_region = self.regions[first_reg] + if self.districts: + first_dis = list(self.districts.keys())[0] + self.current_district = self.districts[first_dis] @staticmethod def _load_model(): @@ -103,8 +124,8 @@ def _create_person(self, pers): lon = 0 if np.isnan(pers.longitude) else pers.longitude # Fixme 0 condition location_dto = gen_location_dto( lat, lon, - region_ref(self.regions['Voreingestellte Bundesländer']['uuid']), - self.districts['Voreingestellter Landkreis']['uuid'] + self.current_district.get('region', self.current_region.get('uuid')), + self.current_district.get('uuid') ) person = gen_person_dto( first_name=pers.first_name, @@ -122,13 +143,20 @@ def add_region(self, region): region_id, region_uuid = insert_region(region) self.regions[region] = {} self.regions[region]['id'] = region_id + self.regions[region]['uuid'] = region_uuid + self.current_region = self.regions[region] + self.current_district = {} def add_district(self, district, region): - region_id = self.regions[region]['id'] + self.add_region(region) + + region_id = self.current_region['id'] district_id, district_uuid = insert_district(district, region_id) self.districts[district] = {} self.districts[district]['id'] = district_id self.districts[district]['uuid'] = district_uuid + self.districts[district]['region'] = self.current_region['uuid'] + self.current_district = self.districts[district] # todo Create individual cases that reproduce the # aggregated data from covid dashboard and SurvStat @@ -148,14 +176,13 @@ def map_symptom(s): res[tmp] = SymptomState.YES return res - disease = Disease.CORONAVIRUS # todo model_cases = self.model['cases'] model_contacts = self.model['contacts'] for i in range(n): m_case = model_cases.iloc[i] person_dto = self._create_person(m_case) - symptoms_dto = gen_symptom_dto(Disease.CORONAVIRUS, map_symptom(m_case.symptom)) + symptoms_dto = gen_symptom_dto(self.current_disease, map_symptom(m_case.symptom)) case_outcome = CaseOutcome.DECEASED if m_case.died else CaseOutcome.NO_OUTCOME outcome_date = m_case.reporting_date # FIXME @@ -163,7 +190,7 @@ def map_symptom(s): case_dto = gen_case_dto( date=m_case.reporting_date, p_uuid=person_dto.uuid, - disease=disease, + disease=self.current_disease, symptoms=symptoms_dto, case_outcome=case_outcome, outcome_date=outcome_date @@ -190,11 +217,11 @@ def pre_populate_events_and_participants(self, n=2): location_dto = gen_location_dto( m_event.latitude, m_event.longitude, - region_ref(self.regions['Voreingestellte Bundesländer']['uuid']), - self.districts['Voreingestellter Landkreis']['uuid'] # m_event.address] + self.current_district.get('region', self.current_region.get('uuid')), + self.current_district.get('uuid') ) # FIXME use the real place - event_dto = gen_event_dto(event_desc, start_date, location_dto, Disease.CORONAVIRUS) + event_dto = gen_event_dto(event_desc, start_date, location_dto, self.current_disease) # add participants m_participants = model_participants.query(f'id_event == {m_event.id}') From 116b94000059271c11eea73c16dc83258dd1d6d3 Mon Sep 17 00:00:00 2001 From: obdulia-losantos Date: Sat, 15 May 2021 11:20:37 +0200 Subject: [PATCH 2/2] fix: setup_and_run script --- README.md | 1 - src/config/setup_and_run.sh | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 48da927..8720c53 100644 --- a/README.md +++ b/README.md @@ -11,4 +11,3 @@ Goal of this project is to generate and import credible test data into SORMAS. T # Run it `docker-compose up -d`: This will start a minimal stack of [SORMAS-Docker](https://github.com/hzi-braunschweig/SORMAS-Docker) which receives the generated data. For more options see [here](src/importer/README.md). - diff --git a/src/config/setup_and_run.sh b/src/config/setup_and_run.sh index 39c4408..ca6bcde 100755 --- a/src/config/setup_and_run.sh +++ b/src/config/setup_and_run.sh @@ -7,7 +7,7 @@ cd importer || exit python3 main.py \ --case-count $CASE_COUNT \ - --event-count $EVENT_COUNT + --event-count $EVENT_COUNT \ --region $REGION \ - --district $DISTRICT + --district $DISTRICT \ --disease $DISEASE