From 1819f05dd3fc0b4bdc598da5b086f99ed52edf9d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 3 Dec 2024 21:17:22 +0300 Subject: [PATCH 01/16] Update auth service production image tag to prod-1ea1da62-1733249735 --- k8s/auth-service/values-prod.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s/auth-service/values-prod.yaml b/k8s/auth-service/values-prod.yaml index fb2687a864..c2981d9f1b 100644 --- a/k8s/auth-service/values-prod.yaml +++ b/k8s/auth-service/values-prod.yaml @@ -6,7 +6,7 @@ app: replicaCount: 3 image: repository: eu.gcr.io/airqo-250220/airqo-auth-api - tag: prod-da0db8f0-1733248410 + tag: prod-1ea1da62-1733249735 nameOverride: '' fullnameOverride: '' podAnnotations: {} From ff9d78484cbd3f7a3537b78c19a7015ec1275de9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 3 Dec 2024 21:17:46 +0300 Subject: [PATCH 02/16] Update device registry production image tag to prod-1ea1da62-1733249735 --- k8s/device-registry/values-prod.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s/device-registry/values-prod.yaml b/k8s/device-registry/values-prod.yaml index 06459d97a8..5affe94637 100644 --- a/k8s/device-registry/values-prod.yaml +++ b/k8s/device-registry/values-prod.yaml @@ -6,7 +6,7 @@ app: replicaCount: 3 image: repository: eu.gcr.io/airqo-250220/airqo-device-registry-api - tag: prod-da0db8f0-1733248410 + tag: prod-1ea1da62-1733249735 nameOverride: '' fullnameOverride: '' podAnnotations: {} From 1557b8bb99d7b11cac249b9a9c2b0c683598a8ef Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 3 Dec 2024 21:18:29 +0300 Subject: [PATCH 03/16] Update workflows prod image tag to prod-1ea1da62-1733249735 --- k8s/workflows/values-prod.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s/workflows/values-prod.yaml b/k8s/workflows/values-prod.yaml index cf358a829c..63698a7af0 100644 --- a/k8s/workflows/values-prod.yaml +++ b/k8s/workflows/values-prod.yaml @@ -10,7 +10,7 @@ images: initContainer: eu.gcr.io/airqo-250220/airqo-workflows-xcom redisContainer: eu.gcr.io/airqo-250220/airqo-redis containers: eu.gcr.io/airqo-250220/airqo-workflows - tag: prod-da0db8f0-1733248410 + tag: prod-1ea1da62-1733249735 nameOverride: '' fullnameOverride: '' podAnnotations: {} From ad8bfb6d6cbc216eb7582ef76162751acf6cf963 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 3 Dec 2024 21:19:23 +0300 Subject: [PATCH 04/16] Update predict production image tag to prod-1ea1da62-1733249735 --- k8s/predict/values-prod.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s/predict/values-prod.yaml b/k8s/predict/values-prod.yaml index cfd5147c61..19886efe9a 100644 --- a/k8s/predict/values-prod.yaml +++ b/k8s/predict/values-prod.yaml @@ -7,7 +7,7 @@ images: predictJob: eu.gcr.io/airqo-250220/airqo-predict-job trainJob: eu.gcr.io/airqo-250220/airqo-train-job predictPlaces: eu.gcr.io/airqo-250220/airqo-predict-places-air-quality - tag: prod-da0db8f0-1733248410 + tag: prod-1ea1da62-1733249735 api: name: airqo-prediction-api label: prediction-api From 74c0ef3b6e3bcd37324ec17b942e7c076c1e7ab9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 3 Dec 2024 21:19:35 +0300 Subject: [PATCH 05/16] Update workflows staging image tag to stage-e06dd73e-1733249810 --- k8s/workflows/values-stage.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s/workflows/values-stage.yaml b/k8s/workflows/values-stage.yaml index 05cf20e2dc..1d7703f899 100644 --- a/k8s/workflows/values-stage.yaml +++ b/k8s/workflows/values-stage.yaml @@ -10,7 +10,7 @@ images: initContainer: eu.gcr.io/airqo-250220/airqo-stage-workflows-xcom redisContainer: eu.gcr.io/airqo-250220/airqo-stage-redis containers: eu.gcr.io/airqo-250220/airqo-stage-workflows - tag: stage-7f60b036-1733224934 + tag: stage-e06dd73e-1733249810 nameOverride: '' fullnameOverride: '' podAnnotations: {} From c4158373bfb92aed4e9019f545a98054fb29923b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 3 Dec 2024 21:21:19 +0300 Subject: [PATCH 06/16] Update AirQo exceedance production image tag to prod-03a25cf1-1733249950 --- k8s/exceedance/values-prod-airqo.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s/exceedance/values-prod-airqo.yaml b/k8s/exceedance/values-prod-airqo.yaml index 014bba668a..552e86acde 100644 --- a/k8s/exceedance/values-prod-airqo.yaml +++ b/k8s/exceedance/values-prod-airqo.yaml @@ -4,6 +4,6 @@ app: configmap: env-exceedance-production image: repository: eu.gcr.io/airqo-250220/airqo-exceedance-job - tag: prod-1ea1da62-1733249735 + tag: prod-03a25cf1-1733249950 nameOverride: '' fullnameOverride: '' From 209fbca559c32122a543e3ffbc6a892eb8d385e1 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 3 Dec 2024 21:21:29 +0300 Subject: [PATCH 07/16] Update KCCA exceedance production image tag to prod-03a25cf1-1733249950 --- k8s/exceedance/values-prod-kcca.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s/exceedance/values-prod-kcca.yaml b/k8s/exceedance/values-prod-kcca.yaml index e46e12b97a..5c95fd83ad 100644 --- a/k8s/exceedance/values-prod-kcca.yaml +++ b/k8s/exceedance/values-prod-kcca.yaml @@ -4,6 +4,6 @@ app: configmap: env-exceedance-production image: repository: eu.gcr.io/airqo-250220/kcca-exceedance-job - tag: prod-1ea1da62-1733249735 + tag: prod-03a25cf1-1733249950 nameOverride: '' fullnameOverride: '' From dc7eea68b3327bbedd34c60fd134c71463ee223c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 3 Dec 2024 21:21:55 +0300 Subject: [PATCH 08/16] Update device registry production image tag to prod-03a25cf1-1733249950 --- k8s/device-registry/values-prod.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s/device-registry/values-prod.yaml b/k8s/device-registry/values-prod.yaml index 5affe94637..0377f748bf 100644 --- a/k8s/device-registry/values-prod.yaml +++ b/k8s/device-registry/values-prod.yaml @@ -6,7 +6,7 @@ app: replicaCount: 3 image: repository: eu.gcr.io/airqo-250220/airqo-device-registry-api - tag: prod-1ea1da62-1733249735 + tag: prod-03a25cf1-1733249950 nameOverride: '' fullnameOverride: '' podAnnotations: {} From 34c38f936d661c778dc33d16943512c70632a73b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 3 Dec 2024 21:22:47 +0300 Subject: [PATCH 09/16] Update workflows prod image tag to prod-03a25cf1-1733249950 --- k8s/workflows/values-prod.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s/workflows/values-prod.yaml b/k8s/workflows/values-prod.yaml index 63698a7af0..8c1b6e7ef9 100644 --- a/k8s/workflows/values-prod.yaml +++ b/k8s/workflows/values-prod.yaml @@ -10,7 +10,7 @@ images: initContainer: eu.gcr.io/airqo-250220/airqo-workflows-xcom redisContainer: eu.gcr.io/airqo-250220/airqo-redis containers: eu.gcr.io/airqo-250220/airqo-workflows - tag: prod-1ea1da62-1733249735 + tag: prod-03a25cf1-1733249950 nameOverride: '' fullnameOverride: '' podAnnotations: {} From 47188077cca2e19f11b51b39f2c29a57709719d6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 3 Dec 2024 21:24:58 +0300 Subject: [PATCH 10/16] Update spatial production image tag to prod-1ea1da62-1733249735 --- k8s/spatial/values-prod.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s/spatial/values-prod.yaml b/k8s/spatial/values-prod.yaml index 0bcb1a762a..7f8b853ac4 100644 --- a/k8s/spatial/values-prod.yaml +++ b/k8s/spatial/values-prod.yaml @@ -6,7 +6,7 @@ app: replicaCount: 3 image: repository: eu.gcr.io/airqo-250220/airqo-spatial-api - tag: prod-da0db8f0-1733248410 + tag: prod-1ea1da62-1733249735 nameOverride: '' fullnameOverride: '' podAnnotations: {} From 9a4b4a7b08bf87fdfbab2c6bad0181e7651892f1 Mon Sep 17 00:00:00 2001 From: NicholasTurner23 Date: Wed, 4 Dec 2024 11:59:47 +0300 Subject: [PATCH 11/16] Update bigquery builder to dynamically include networks --- src/workflows/airqo_etl_utils/bigquery_api.py | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/src/workflows/airqo_etl_utils/bigquery_api.py b/src/workflows/airqo_etl_utils/bigquery_api.py index 1c814fd3b4..f692bb49d1 100644 --- a/src/workflows/airqo_etl_utils/bigquery_api.py +++ b/src/workflows/airqo_etl_utils/bigquery_api.py @@ -509,7 +509,7 @@ def compose_query( table: str, start_date_time: str, end_date_time: str, - network: str, + network: str = "all", where_fields: dict = None, null_cols: list = None, columns: list = None, @@ -536,17 +536,15 @@ def compose_query( Exception: If an invalid column is provided in `where_fields` or `null_cols`, or if the `query_type` is not supported. """ - tenant = "airqo" null_cols = [] if null_cols is None else null_cols where_fields = {} if where_fields is None else where_fields columns = ", ".join(map(str, columns)) if columns else " * " - where_clause = ( - f" timestamp >= '{start_date_time}' and timestamp <= '{end_date_time}' " - ) - if tenant != Tenant.ALL: - where_clause = f" {where_clause} and tenant = '{str(tenant)}' or network = '{str(network)}' " + where_clause = f" timestamp between '{start_date_time}' and '{end_date_time}' " + + if network: + where_clause += f"AND network = '{network}' " valid_cols = self.get_columns(table=table) @@ -613,7 +611,7 @@ def query_data( start_date_time: str, end_date_time: str, table: str, - network: str, + network: str = None, dynamic_query: bool = False, columns: list = None, where_fields: dict = None, @@ -649,7 +647,11 @@ def query_data( ) else: query = self.dynamic_averaging_query( - table, start_date_time, end_date_time, time_granularity=time_granularity + table, + start_date_time, + end_date_time, + network=network, + time_granularity=time_granularity, ) dataframe = self.client.query(query=query).result().to_dataframe() @@ -669,6 +671,7 @@ def dynamic_averaging_query( end_date_time: str, exclude_columns: list = None, group_by: list = None, + network: str = "all", time_granularity: str = "HOUR", ) -> str: """ @@ -728,11 +731,18 @@ def dynamic_averaging_query( ] ) + where_clause: str = ( + f"timestamp BETWEEN '{start_date_time}' AND '{end_date_time}' " + ) + + if network: + where_clause += f"AND network = '{network}' " + # Include time granularity in both SELECT and GROUP BY timestamp_trunc = f"TIMESTAMP_TRUNC(timestamp, {time_granularity.upper()}) AS {time_granularity.lower()}" group_by_clause = ", ".join(group_by + [time_granularity.lower()]) - query = f"""SELECT {", ".join(group_by)}, {timestamp_trunc}, {avg_columns} FROM `{table}` WHERE timestamp BETWEEN '{start_date_time}' AND '{end_date_time}' GROUP BY {group_by_clause} ORDER BY {time_granularity.lower()};""" + query = f"""SELECT {", ".join(group_by)}, {timestamp_trunc}, {avg_columns} FROM `{table}` WHERE {where_clause} GROUP BY {group_by_clause} ORDER BY {time_granularity.lower()};""" return query From aa2dfeb344b2e4b33917c1477beccf56dd0f1d33 Mon Sep 17 00:00:00 2001 From: NicholasTurner23 Date: Wed, 4 Dec 2024 12:01:28 +0300 Subject: [PATCH 12/16] Update tenants/networks --- src/workflows/airqo_etl_utils/airqo_api.py | 3 +-- src/workflows/airqo_etl_utils/airqo_utils.py | 13 +++++++------ src/workflows/airqo_etl_utils/daily_data_utils.py | 2 -- .../airqo_etl_utils/data_warehouse_utils.py | 3 --- .../airqo_etl_utils/schema/raw_measurements.json | 5 +++++ src/workflows/airqo_etl_utils/weather_data_utils.py | 2 -- src/workflows/dags/airqo_measurements.py | 1 + 7 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/workflows/airqo_etl_utils/airqo_api.py b/src/workflows/airqo_etl_utils/airqo_api.py index 4dc91e2587..b0866fc26b 100644 --- a/src/workflows/airqo_etl_utils/airqo_api.py +++ b/src/workflows/airqo_etl_utils/airqo_api.py @@ -135,7 +135,7 @@ def get_devices( device_category: DeviceCategory = DeviceCategory.NONE, ) -> List[Dict[str, Any]]: """ - Retrieve devices given a tenant and device category. + Retrieve devices given a network and device category. Args: - network (str): An Enum that represents site ownership. @@ -198,7 +198,6 @@ def get_devices( "device_category": str( DeviceCategory.from_str(device.pop("category", None)) ), - "network": device.get("network"), "device_manufacturer": device.get("network", "airqo"), **device, } diff --git a/src/workflows/airqo_etl_utils/airqo_utils.py b/src/workflows/airqo_etl_utils/airqo_utils.py index 8524317968..71e519ec80 100644 --- a/src/workflows/airqo_etl_utils/airqo_utils.py +++ b/src/workflows/airqo_etl_utils/airqo_utils.py @@ -59,7 +59,7 @@ def extract_uncalibrated_data(start_date_time, end_date_time) -> pd.DataFrame: null_cols=["pm2_5_calibrated_value"], start_date_time=start_date_time, end_date_time=end_date_time, - tenant=Tenant.AIRQO, + network=str(Tenant.AIRQO), ) return DataValidationUtils.remove_outliers(hourly_uncalibrated_data) @@ -79,7 +79,7 @@ def extract_data_from_bigquery( table=table, start_date_time=start_date_time, end_date_time=end_date_time, - tenant=Tenant.AIRQO, + network=str(Tenant.AIRQO), ) return DataValidationUtils.remove_outliers(raw_data) @@ -117,7 +117,10 @@ def remove_duplicates(data: pd.DataFrame) -> pd.DataFrame: @staticmethod def extract_aggregated_raw_data( - start_date_time: str, end_date_time: str, dynamic_query: bool = False + start_date_time: str, + end_date_time: str, + network: str = None, + dynamic_query: bool = False, ) -> pd.DataFrame: """ Retrieves raw pm2.5 sensor data from bigquery and computes averages for the numeric columns grouped by device_number, device_id and site_id @@ -128,9 +131,7 @@ def extract_aggregated_raw_data( start_date_time=start_date_time, end_date_time=end_date_time, table=bigquery_api.raw_measurements_table, - network=str( - Tenant.AIRQO - ), # TODO Replace tenant implementation with network implementation + network=network, dynamic_query=dynamic_query, ) diff --git a/src/workflows/airqo_etl_utils/daily_data_utils.py b/src/workflows/airqo_etl_utils/daily_data_utils.py index 63c3f0455a..804382c8e2 100644 --- a/src/workflows/airqo_etl_utils/daily_data_utils.py +++ b/src/workflows/airqo_etl_utils/daily_data_utils.py @@ -44,7 +44,6 @@ def query_hourly_data(start_date_time, end_date_time) -> pd.DataFrame: table=bigquery_api.hourly_measurements_table, start_date_time=start_date_time, end_date_time=end_date_time, - tenant=Tenant.ALL, ) return DataValidationUtils.remove_outliers(raw_data) @@ -57,7 +56,6 @@ def query_daily_data(start_date_time, end_date_time) -> pd.DataFrame: table=bigquery_api.daily_measurements_table, start_date_time=start_date_time, end_date_time=end_date_time, - tenant=Tenant.ALL, ) return DataValidationUtils.remove_outliers(raw_data) diff --git a/src/workflows/airqo_etl_utils/data_warehouse_utils.py b/src/workflows/airqo_etl_utils/data_warehouse_utils.py index 7e3da8b2d3..3c2ed5491e 100644 --- a/src/workflows/airqo_etl_utils/data_warehouse_utils.py +++ b/src/workflows/airqo_etl_utils/data_warehouse_utils.py @@ -33,7 +33,6 @@ def extract_hourly_bam_data( start_date_time=start_date_time, end_date_time=end_date_time, table=biq_query_api.bam_measurements_table, - tenant=Tenant.ALL, ) if data.empty: @@ -59,7 +58,6 @@ def extract_data_from_big_query( start_date_time=start_date_time, end_date_time=end_date_time, table=biq_query_api.consolidated_data_table, - tenant=Tenant.ALL, ) @staticmethod @@ -83,7 +81,6 @@ def extract_hourly_low_cost_data( start_date_time=start_date_time, end_date_time=end_date_time, table=biq_query_api.hourly_measurements_table, - tenant=Tenant.ALL, ) if data.empty: diff --git a/src/workflows/airqo_etl_utils/schema/raw_measurements.json b/src/workflows/airqo_etl_utils/schema/raw_measurements.json index bd492d4209..b75ecb5b88 100644 --- a/src/workflows/airqo_etl_utils/schema/raw_measurements.json +++ b/src/workflows/airqo_etl_utils/schema/raw_measurements.json @@ -4,6 +4,11 @@ "type": "STRING", "mode": "NULLABLE" }, + { + "name": "network", + "type": "STRING", + "mode": "NULLABLE" + }, { "name": "timestamp", "type": "TIMESTAMP", diff --git a/src/workflows/airqo_etl_utils/weather_data_utils.py b/src/workflows/airqo_etl_utils/weather_data_utils.py index 725b633e83..d8c24b7e26 100644 --- a/src/workflows/airqo_etl_utils/weather_data_utils.py +++ b/src/workflows/airqo_etl_utils/weather_data_utils.py @@ -24,7 +24,6 @@ def extract_hourly_weather_data(start_date_time, end_date_time) -> pd.DataFrame: start_date_time=start_date_time, end_date_time=end_date_time, table=bigquery_api.hourly_weather_table, - tenant=Tenant.ALL, ) cols = bigquery_api.get_columns(table=bigquery_api.hourly_weather_table) return pd.DataFrame([], cols) if measurements.empty else measurements @@ -79,7 +78,6 @@ def extract_raw_data_from_bigquery(start_date_time, end_date_time) -> pd.DataFra start_date_time=start_date_time, end_date_time=end_date_time, table=bigquery_api.raw_weather_table, - tenant=Tenant.ALL, ) return measurements diff --git a/src/workflows/dags/airqo_measurements.py b/src/workflows/dags/airqo_measurements.py index 339d41823a..4804193fea 100644 --- a/src/workflows/dags/airqo_measurements.py +++ b/src/workflows/dags/airqo_measurements.py @@ -42,6 +42,7 @@ def extract_device_measurements(**kwargs) -> pd.DataFrame: return AirQoDataUtils.extract_aggregated_raw_data( start_date_time=start_date_time, end_date_time=end_date_time, + network="airqo", dynamic_query=True, ) From f8ee1cf51c44dd3adceff7020d8e2f2e938330ae Mon Sep 17 00:00:00 2001 From: NicholasTurner23 Date: Wed, 4 Dec 2024 12:44:18 +0300 Subject: [PATCH 13/16] Add networks to schemas --- .../schema/airqo_mobile_measurements.json | 319 +++++---- .../schema/bam_measurements.json | 7 +- .../schema/data_warehouse.json | 669 +++++++++--------- .../airqo_etl_utils/schema/devices.json | 7 +- .../schema/latest_measurements.json | 479 ++++++------- .../schema/mobile_measurements.json | 7 +- 6 files changed, 759 insertions(+), 729 deletions(-) diff --git a/src/workflows/airqo_etl_utils/schema/airqo_mobile_measurements.json b/src/workflows/airqo_etl_utils/schema/airqo_mobile_measurements.json index d0fad4bfb2..b3a4ea0a30 100644 --- a/src/workflows/airqo_etl_utils/schema/airqo_mobile_measurements.json +++ b/src/workflows/airqo_etl_utils/schema/airqo_mobile_measurements.json @@ -1,158 +1,163 @@ [ - { - "name": "tenant", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "timestamp", - "type": "TIMESTAMP", - "mode": "NULLABLE" - }, - { - "name": "device_number", - "type": "INTEGER", - "mode": "NULLABLE" - }, - { - "name": "device_id", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "latitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "longitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "s1_pm2_5", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3." - }, - { - "name": "s2_pm2_5", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3." - }, - { - "name": "s1_pm10", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3." - }, - { - "name": "s2_pm10", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3." - }, - { - "name": "altitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "battery", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "satellites", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "hdop", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "device_temperature", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "device_humidity", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "pm2_5_calibrated_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3." - }, - { - "name": "pm10_calibrated_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3." - }, - { - "name": "pm2_5_raw_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3." - }, - { - "name": "pm10_raw_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3." - }, - { - "name": "temperature", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "°C." - }, - { - "name": "humidity", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "%." - }, - { - "name": "wind_speed", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "m/s." - }, - { - "name": "atmospheric_pressure", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "kPa." - }, - { - "name": "radiation", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "W/m2." - }, - { - "name": "wind_gusts", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "m/s." - }, - { - "name": "precipitation", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "mm." - }, - { - "name": "wind_direction", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "degrees" - } -] \ No newline at end of file + { + "name": "tenant", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "network", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "timestamp", + "type": "TIMESTAMP", + "mode": "NULLABLE" + }, + { + "name": "device_number", + "type": "INTEGER", + "mode": "NULLABLE" + }, + { + "name": "device_id", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "latitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "longitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "s1_pm2_5", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3." + }, + { + "name": "s2_pm2_5", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3." + }, + { + "name": "s1_pm10", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3." + }, + { + "name": "s2_pm10", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3." + }, + { + "name": "altitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "battery", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "satellites", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "hdop", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "device_temperature", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "device_humidity", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "pm2_5_calibrated_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3." + }, + { + "name": "pm10_calibrated_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3." + }, + { + "name": "pm2_5_raw_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3." + }, + { + "name": "pm10_raw_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3." + }, + { + "name": "temperature", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "°C." + }, + { + "name": "humidity", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "%." + }, + { + "name": "wind_speed", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "m/s." + }, + { + "name": "atmospheric_pressure", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "kPa." + }, + { + "name": "radiation", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "W/m2." + }, + { + "name": "wind_gusts", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "m/s." + }, + { + "name": "precipitation", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "mm." + }, + { + "name": "wind_direction", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "degrees" + } +] diff --git a/src/workflows/airqo_etl_utils/schema/bam_measurements.json b/src/workflows/airqo_etl_utils/schema/bam_measurements.json index dbc9bb129e..9edb357713 100644 --- a/src/workflows/airqo_etl_utils/schema/bam_measurements.json +++ b/src/workflows/airqo_etl_utils/schema/bam_measurements.json @@ -4,6 +4,11 @@ "type": "STRING", "mode": "NULLABLE" }, + { + "name": "network", + "type": "STRING", + "mode": "NULLABLE" + }, { "name": "site_id", "type": "STRING", @@ -58,4 +63,4 @@ "mode": "NULLABLE", "description": "μg/m3." } -] \ No newline at end of file +] diff --git a/src/workflows/airqo_etl_utils/schema/data_warehouse.json b/src/workflows/airqo_etl_utils/schema/data_warehouse.json index cdbcf031f8..48290d3fc7 100644 --- a/src/workflows/airqo_etl_utils/schema/data_warehouse.json +++ b/src/workflows/airqo_etl_utils/schema/data_warehouse.json @@ -1,333 +1,338 @@ [ - { - "name": "tenant", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "timestamp", - "type": "TIMESTAMP", - "mode": "NULLABLE" - }, - { - "name": "site_id", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "site_name", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "site_description", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "site_latitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_longitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_altitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "device_number", - "type": "INTEGER", - "mode": "NULLABLE" - }, - { - "name": "device_id", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "device_category", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "device_latitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "device_longitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "device_battery", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "device_altitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "device_temperature", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "device_humidity", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "pm2_5", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "s1_pm2_5", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 2.5 of sensor one" - }, - { - "name": "s2_pm2_5", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 2.5 of sensor two" - }, - { - "name": "pm2_5_raw_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\nEquals the pm 2.5 average value of sensor one and sensor two" - }, - { - "name": "pm2_5_calibrated_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\nEquals the calibrated pm 2.5 value." - }, - { - "name": "pm10", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "s1_pm10", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 10 of sensor one" - }, - { - "name": "s2_pm10", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 10 of sensor two" - }, - { - "name": "pm10_raw_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\nEquals the pm 10 average value of sensor one and sensor two." - }, - { - "name": "pm10_calibrated_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\nEquals the calibrated pm 10 value. " - }, - { - "name": "no2", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "no2_raw_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "ppb.\nNO2 Concentration raw value" - }, - { - "name": "no2_calibrated_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "ppb.\nEquals the NO2 Concentration value if available else equals the raw value." - }, - { - "name": "pm1", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "pm1_raw_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 1 raw value. " - }, - { - "name": "pm1_calibrated_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 1 calibrated value. " - }, - { - "name": "temperature", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "°C." - }, - { - "name": "humidity", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "%." - }, - { - "name": "wind_speed", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "m/s." - }, - { - "name": "atmospheric_pressure", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "kPa." - }, - { - "name": "radiation", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "W/m2." - }, - { - "name": "vapor_pressure", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "kPa." - }, - { - "name": "wind_gusts", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "m/s." - }, - { - "name": "precipitation", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "mm." - }, - { - "name": "wind_direction", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "degrees" - }, - { - "name": "satellites", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "hdop", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_bearing_to_kampala_center", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_distance_to_kampala_center", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_landform_90", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_landform_270", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_aspect", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_distance_to_nearest_tertiary_road", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_distance_to_nearest_primary_road", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_distance_to_nearest_road", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_distance_to_nearest_residential_road", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_distance_to_nearest_secondary_road", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_distance_to_nearest_unclassified_road", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "country", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "region", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "district", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "city", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "county", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "sub_county", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "parish", - "type": "STRING", - "mode": "NULLABLE" - } -] \ No newline at end of file + { + "name": "tenant", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "network", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "timestamp", + "type": "TIMESTAMP", + "mode": "NULLABLE" + }, + { + "name": "site_id", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "site_name", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "site_description", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "site_latitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_longitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_altitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "device_number", + "type": "INTEGER", + "mode": "NULLABLE" + }, + { + "name": "device_id", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "device_category", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "device_latitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "device_longitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "device_battery", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "device_altitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "device_temperature", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "device_humidity", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "pm2_5", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "s1_pm2_5", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 2.5 of sensor one" + }, + { + "name": "s2_pm2_5", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 2.5 of sensor two" + }, + { + "name": "pm2_5_raw_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\nEquals the pm 2.5 average value of sensor one and sensor two" + }, + { + "name": "pm2_5_calibrated_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\nEquals the calibrated pm 2.5 value." + }, + { + "name": "pm10", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "s1_pm10", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 10 of sensor one" + }, + { + "name": "s2_pm10", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 10 of sensor two" + }, + { + "name": "pm10_raw_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\nEquals the pm 10 average value of sensor one and sensor two." + }, + { + "name": "pm10_calibrated_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\nEquals the calibrated pm 10 value. " + }, + { + "name": "no2", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "no2_raw_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "ppb.\nNO2 Concentration raw value" + }, + { + "name": "no2_calibrated_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "ppb.\nEquals the NO2 Concentration value if available else equals the raw value." + }, + { + "name": "pm1", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "pm1_raw_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 1 raw value. " + }, + { + "name": "pm1_calibrated_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 1 calibrated value. " + }, + { + "name": "temperature", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "°C." + }, + { + "name": "humidity", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "%." + }, + { + "name": "wind_speed", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "m/s." + }, + { + "name": "atmospheric_pressure", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "kPa." + }, + { + "name": "radiation", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "W/m2." + }, + { + "name": "vapor_pressure", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "kPa." + }, + { + "name": "wind_gusts", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "m/s." + }, + { + "name": "precipitation", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "mm." + }, + { + "name": "wind_direction", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "degrees" + }, + { + "name": "satellites", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "hdop", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_bearing_to_kampala_center", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_distance_to_kampala_center", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_landform_90", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_landform_270", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_aspect", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_distance_to_nearest_tertiary_road", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_distance_to_nearest_primary_road", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_distance_to_nearest_road", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_distance_to_nearest_residential_road", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_distance_to_nearest_secondary_road", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_distance_to_nearest_unclassified_road", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "country", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "region", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "district", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "city", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "county", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "sub_county", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "parish", + "type": "STRING", + "mode": "NULLABLE" + } +] diff --git a/src/workflows/airqo_etl_utils/schema/devices.json b/src/workflows/airqo_etl_utils/schema/devices.json index d27384cb32..966b69122e 100644 --- a/src/workflows/airqo_etl_utils/schema/devices.json +++ b/src/workflows/airqo_etl_utils/schema/devices.json @@ -4,6 +4,11 @@ "type": "STRING", "mode": "REQUIRED" }, + { + "name": "network", + "type": "STRING", + "mode": "REQUIRED" + }, { "name": "latitude", "type": "FLOAT", @@ -59,4 +64,4 @@ "type": "STRING", "mode": "NULLABLE" } -] \ No newline at end of file +] diff --git a/src/workflows/airqo_etl_utils/schema/latest_measurements.json b/src/workflows/airqo_etl_utils/schema/latest_measurements.json index 72f4cffc4f..34c50539e0 100644 --- a/src/workflows/airqo_etl_utils/schema/latest_measurements.json +++ b/src/workflows/airqo_etl_utils/schema/latest_measurements.json @@ -1,238 +1,243 @@ [ - { - "name": "tenant", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "timestamp", - "type": "TIMESTAMP", - "mode": "NULLABLE" - }, - { - "name": "site_id", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "site_name", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "site_location", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "site_display_name", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "site_display_location", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "site_latitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_longitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_approximate_latitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_approximate_longitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "device_number", - "type": "INTEGER", - "mode": "NULLABLE" - }, - { - "name": "device_id", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "device_category", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "device_latitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "device_longitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "pm2_5", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "s1_pm2_5", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 2.5 of sensor one" - }, - { - "name": "s2_pm2_5", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 2.5 of sensor two" - }, - { - "name": "pm2_5_raw_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\nEquals the pm 2.5 average value of sensor one and sensor two" - }, - { - "name": "pm2_5_calibrated_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\nEquals the calibrated pm 2.5 value." - }, - { - "name": "pm10", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "s1_pm10", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 10 of sensor one" - }, - { - "name": "s2_pm10", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 10 of sensor two" - }, - { - "name": "pm10_raw_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\nEquals the pm 10 average value of sensor one and sensor two." - }, - { - "name": "pm10_calibrated_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\nEquals the calibrated pm 10 value." - }, - { - "name": "no2", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "no2_raw_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "ppb.\nNO2 Concentration raw value" - }, - { - "name": "no2_calibrated_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "ppb.\nEquals the NO2 Concentration value if available else equals the raw value." - }, - { - "name": "pm1", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "pm1_raw_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 1 raw value." - }, - { - "name": "pm1_calibrated_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 1 calibrated value." - }, - { - "name": "temperature", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "°C." - }, - { - "name": "humidity", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "%." - }, - { - "name": "wind_speed", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "m/s." - }, - { - "name": "atmospheric_pressure", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "kPa." - }, - { - "name": "radiation", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "W/m2." - }, - { - "name": "vapor_pressure", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "kPa." - }, - { - "name": "wind_gusts", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "m/s." - }, - { - "name": "precipitation", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "mm." - }, - { - "name": "wind_direction", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "degrees" - }, - { - "name": "satellites", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "hdop", - "type": "FLOAT", - "mode": "NULLABLE" - } -] \ No newline at end of file + { + "name": "tenant", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "network", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "timestamp", + "type": "TIMESTAMP", + "mode": "NULLABLE" + }, + { + "name": "site_id", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "site_name", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "site_location", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "site_display_name", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "site_display_location", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "site_latitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_longitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_approximate_latitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_approximate_longitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "device_number", + "type": "INTEGER", + "mode": "NULLABLE" + }, + { + "name": "device_id", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "device_category", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "device_latitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "device_longitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "pm2_5", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "s1_pm2_5", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 2.5 of sensor one" + }, + { + "name": "s2_pm2_5", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 2.5 of sensor two" + }, + { + "name": "pm2_5_raw_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\nEquals the pm 2.5 average value of sensor one and sensor two" + }, + { + "name": "pm2_5_calibrated_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\nEquals the calibrated pm 2.5 value." + }, + { + "name": "pm10", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "s1_pm10", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 10 of sensor one" + }, + { + "name": "s2_pm10", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 10 of sensor two" + }, + { + "name": "pm10_raw_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\nEquals the pm 10 average value of sensor one and sensor two." + }, + { + "name": "pm10_calibrated_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\nEquals the calibrated pm 10 value." + }, + { + "name": "no2", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "no2_raw_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "ppb.\nNO2 Concentration raw value" + }, + { + "name": "no2_calibrated_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "ppb.\nEquals the NO2 Concentration value if available else equals the raw value." + }, + { + "name": "pm1", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "pm1_raw_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 1 raw value." + }, + { + "name": "pm1_calibrated_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 1 calibrated value." + }, + { + "name": "temperature", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "°C." + }, + { + "name": "humidity", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "%." + }, + { + "name": "wind_speed", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "m/s." + }, + { + "name": "atmospheric_pressure", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "kPa." + }, + { + "name": "radiation", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "W/m2." + }, + { + "name": "vapor_pressure", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "kPa." + }, + { + "name": "wind_gusts", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "m/s." + }, + { + "name": "precipitation", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "mm." + }, + { + "name": "wind_direction", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "degrees" + }, + { + "name": "satellites", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "hdop", + "type": "FLOAT", + "mode": "NULLABLE" + } +] diff --git a/src/workflows/airqo_etl_utils/schema/mobile_measurements.json b/src/workflows/airqo_etl_utils/schema/mobile_measurements.json index 3934262811..1f7b1b5a11 100644 --- a/src/workflows/airqo_etl_utils/schema/mobile_measurements.json +++ b/src/workflows/airqo_etl_utils/schema/mobile_measurements.json @@ -4,6 +4,11 @@ "type": "STRING", "mode": "NULLABLE" }, + { + "name": "network", + "type": "STRING", + "mode": "NULLABLE" + }, { "name": "timestamp", "type": "TIMESTAMP", @@ -129,4 +134,4 @@ "type": "FLOAT", "mode": "NULLABLE" } -] \ No newline at end of file +] From 50c8a62db0d265a68c3c1e750c9b42a5e7ddce51 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 4 Dec 2024 12:45:56 +0300 Subject: [PATCH 14/16] Update workflows staging image tag to stage-f43cad79-1733305404 --- k8s/workflows/values-stage.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s/workflows/values-stage.yaml b/k8s/workflows/values-stage.yaml index 1d7703f899..667f099cdd 100644 --- a/k8s/workflows/values-stage.yaml +++ b/k8s/workflows/values-stage.yaml @@ -10,7 +10,7 @@ images: initContainer: eu.gcr.io/airqo-250220/airqo-stage-workflows-xcom redisContainer: eu.gcr.io/airqo-250220/airqo-stage-redis containers: eu.gcr.io/airqo-250220/airqo-stage-workflows - tag: stage-e06dd73e-1733249810 + tag: stage-f43cad79-1733305404 nameOverride: '' fullnameOverride: '' podAnnotations: {} From 647c05a94167276a2f215bc6e35bc44ea59b72fc Mon Sep 17 00:00:00 2001 From: NicholasTurner23 Date: Wed, 4 Dec 2024 14:19:51 +0300 Subject: [PATCH 15/16] Clean up to remove repeated operations --- src/workflows/airqo_etl_utils/airqo_utils.py | 21 +++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/workflows/airqo_etl_utils/airqo_utils.py b/src/workflows/airqo_etl_utils/airqo_utils.py index 71e519ec80..51d14f2721 100644 --- a/src/workflows/airqo_etl_utils/airqo_utils.py +++ b/src/workflows/airqo_etl_utils/airqo_utils.py @@ -758,19 +758,26 @@ def clean_low_cost_sensor_data( AirQoGxExpectations.from_pandas().pm2_5_low_cost_sensor_raw_data( data ) - + else: + data["timestamp"] = pd.to_datetime(data["timestamp"]) data.dropna(subset=["timestamp"], inplace=True) - data["timestamp"] = pd.to_datetime(data["timestamp"]) + data.drop_duplicates( subset=["timestamp", "device_id"], keep="first", inplace=True ) # TODO Find an appropriate place to put this if device_category == DeviceCategory.LOW_COST: - data["pm2_5_raw_value"] = data[["s1_pm2_5", "s2_pm2_5"]].mean(axis=1) - data["pm2_5"] = data[["s1_pm2_5", "s2_pm2_5"]].mean(axis=1) - data["pm10_raw_value"] = data[["s1_pm10", "s2_pm10"]].mean(axis=1) - data["pm10"] = data[["s1_pm10", "s2_pm10"]].mean(axis=1) + is_airqo_network = data["network"] == "airqo" + + pm2_5_mean = data.loc[is_airqo_network, ["s1_pm2_5", "s2_pm2_5"]].mean( + axis=1 + ) + pm10_mean = data.loc[is_airqo_network, ["s1_pm10", "s2_pm10"]].mean(axis=1) + data.loc[is_airqo_network, "pm2_5_raw_value"] = pm2_5_mean + data.loc[is_airqo_network, "pm2_5"] = pm2_5_mean + data.loc[is_airqo_network, "pm10_raw_value"] = pm10_mean + data.loc[is_airqo_network, "pm10"] = pm10_mean return data @staticmethod @@ -1033,7 +1040,7 @@ def merge_aggregated_weather_data( @staticmethod def extract_devices_deployment_logs() -> pd.DataFrame: airqo_api = AirQoApi() - devices = airqo_api.get_devices(tenant=Tenant.AIRQO) + devices = airqo_api.get_devices(network=str(Tenant.AIRQO)) devices_history = pd.DataFrame() for device in devices: try: From ba74197e89e521c39b92bc88919212062336eab7 Mon Sep 17 00:00:00 2001 From: NicholasTurner23 Date: Wed, 4 Dec 2024 14:20:46 +0300 Subject: [PATCH 16/16] Cleanup datetime conversion --- src/workflows/airqo_etl_utils/data_validator.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/workflows/airqo_etl_utils/data_validator.py b/src/workflows/airqo_etl_utils/data_validator.py index 34a25ba59f..13527673be 100644 --- a/src/workflows/airqo_etl_utils/data_validator.py +++ b/src/workflows/airqo_etl_utils/data_validator.py @@ -72,10 +72,10 @@ def format_data_types( data[col] = ( data[col] .astype(str) - .str.replace(r"[^\w\s\.\-:]", "", regex=True) + .str.replace(r"[^\w\s\.\-+:]", "", regex=True) .str.replace(r"(? pd.DataFrame: dtype: list(set(columns) & set(data.columns)) for dtype, columns in column_types.items() } - data = DataValidationUtils.format_data_types( data=data, floats=filtered_columns[ColumnDataType.FLOAT], @@ -151,7 +150,6 @@ def remove_outliers(data: pd.DataFrame) -> pd.DataFrame: ) validated_columns = list(chain.from_iterable(filtered_columns.values())) - for col in validated_columns: is_airqo_network = data["network"] == "airqo" mapped_name = configuration.AIRQO_DATA_COLUMN_NAME_MAPPING.get(col, None)