diff --git a/.gitignore b/.gitignore index 28d6b8a4..4fa76bf0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +__pycache__/ # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. release.zip diff --git a/src/framework/processing/py/poetry.lock b/src/framework/processing/py/poetry.lock index c36c2cd5..e98eb84c 100644 --- a/src/framework/processing/py/poetry.lock +++ b/src/framework/processing/py/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "colorama" @@ -24,36 +24,47 @@ files = [ [[package]] name = "numpy" -version = "1.25.2" +version = "1.26.2" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.9" files = [ - {file = "numpy-1.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:db3ccc4e37a6873045580d413fe79b68e47a681af8db2e046f1dacfa11f86eb3"}, - {file = "numpy-1.25.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:90319e4f002795ccfc9050110bbbaa16c944b1c37c0baeea43c5fb881693ae1f"}, - {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfe4a913e29b418d096e696ddd422d8a5d13ffba4ea91f9f60440a3b759b0187"}, - {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f08f2e037bba04e707eebf4bc934f1972a315c883a9e0ebfa8a7756eabf9e357"}, - {file = "numpy-1.25.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bec1e7213c7cb00d67093247f8c4db156fd03075f49876957dca4711306d39c9"}, - {file = "numpy-1.25.2-cp310-cp310-win32.whl", hash = "sha256:7dc869c0c75988e1c693d0e2d5b26034644399dd929bc049db55395b1379e044"}, - {file = "numpy-1.25.2-cp310-cp310-win_amd64.whl", hash = "sha256:834b386f2b8210dca38c71a6e0f4fd6922f7d3fcff935dbe3a570945acb1b545"}, - {file = "numpy-1.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5462d19336db4560041517dbb7759c21d181a67cb01b36ca109b2ae37d32418"}, - {file = "numpy-1.25.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c5652ea24d33585ea39eb6a6a15dac87a1206a692719ff45d53c5282e66d4a8f"}, - {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d60fbae8e0019865fc4784745814cff1c421df5afee233db6d88ab4f14655a2"}, - {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60e7f0f7f6d0eee8364b9a6304c2845b9c491ac706048c7e8cf47b83123b8dbf"}, - {file = "numpy-1.25.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bb33d5a1cf360304754913a350edda36d5b8c5331a8237268c48f91253c3a364"}, - {file = "numpy-1.25.2-cp311-cp311-win32.whl", hash = "sha256:5883c06bb92f2e6c8181df7b39971a5fb436288db58b5a1c3967702d4278691d"}, - {file = "numpy-1.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:5c97325a0ba6f9d041feb9390924614b60b99209a71a69c876f71052521d42a4"}, - {file = "numpy-1.25.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b79e513d7aac42ae918db3ad1341a015488530d0bb2a6abcbdd10a3a829ccfd3"}, - {file = "numpy-1.25.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eb942bfb6f84df5ce05dbf4b46673ffed0d3da59f13635ea9b926af3deb76926"}, - {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e0746410e73384e70d286f93abf2520035250aad8c5714240b0492a7302fdca"}, - {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7806500e4f5bdd04095e849265e55de20d8cc4b661b038957354327f6d9b295"}, - {file = "numpy-1.25.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8b77775f4b7df768967a7c8b3567e309f617dd5e99aeb886fa14dc1a0791141f"}, - {file = "numpy-1.25.2-cp39-cp39-win32.whl", hash = "sha256:2792d23d62ec51e50ce4d4b7d73de8f67a2fd3ea710dcbc8563a51a03fb07b01"}, - {file = "numpy-1.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:76b4115d42a7dfc5d485d358728cdd8719be33cc5ec6ec08632a5d6fca2ed380"}, - {file = "numpy-1.25.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a1329e26f46230bf77b02cc19e900db9b52f398d6722ca853349a782d4cff55"}, - {file = "numpy-1.25.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c3abc71e8b6edba80a01a52e66d83c5d14433cbcd26a40c329ec7ed09f37901"}, - {file = "numpy-1.25.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:1b9735c27cea5d995496f46a8b1cd7b408b3f34b6d50459d9ac8fe3a20cc17bf"}, - {file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"}, + {file = "numpy-1.26.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3703fc9258a4a122d17043e57b35e5ef1c5a5837c3db8be396c82e04c1cf9b0f"}, + {file = "numpy-1.26.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cc392fdcbd21d4be6ae1bb4475a03ce3b025cd49a9be5345d76d7585aea69440"}, + {file = "numpy-1.26.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36340109af8da8805d8851ef1d74761b3b88e81a9bd80b290bbfed61bd2b4f75"}, + {file = "numpy-1.26.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcc008217145b3d77abd3e4d5ef586e3bdfba8fe17940769f8aa09b99e856c00"}, + {file = "numpy-1.26.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3ced40d4e9e18242f70dd02d739e44698df3dcb010d31f495ff00a31ef6014fe"}, + {file = "numpy-1.26.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b272d4cecc32c9e19911891446b72e986157e6a1809b7b56518b4f3755267523"}, + {file = "numpy-1.26.2-cp310-cp310-win32.whl", hash = "sha256:22f8fc02fdbc829e7a8c578dd8d2e15a9074b630d4da29cda483337e300e3ee9"}, + {file = "numpy-1.26.2-cp310-cp310-win_amd64.whl", hash = "sha256:26c9d33f8e8b846d5a65dd068c14e04018d05533b348d9eaeef6c1bd787f9919"}, + {file = "numpy-1.26.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b96e7b9c624ef3ae2ae0e04fa9b460f6b9f17ad8b4bec6d7756510f1f6c0c841"}, + {file = "numpy-1.26.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:aa18428111fb9a591d7a9cc1b48150097ba6a7e8299fb56bdf574df650e7d1f1"}, + {file = "numpy-1.26.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06fa1ed84aa60ea6ef9f91ba57b5ed963c3729534e6e54055fc151fad0423f0a"}, + {file = "numpy-1.26.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96ca5482c3dbdd051bcd1fce8034603d6ebfc125a7bd59f55b40d8f5d246832b"}, + {file = "numpy-1.26.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:854ab91a2906ef29dc3925a064fcd365c7b4da743f84b123002f6139bcb3f8a7"}, + {file = "numpy-1.26.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f43740ab089277d403aa07567be138fc2a89d4d9892d113b76153e0e412409f8"}, + {file = "numpy-1.26.2-cp311-cp311-win32.whl", hash = "sha256:a2bbc29fcb1771cd7b7425f98b05307776a6baf43035d3b80c4b0f29e9545186"}, + {file = "numpy-1.26.2-cp311-cp311-win_amd64.whl", hash = "sha256:2b3fca8a5b00184828d12b073af4d0fc5fdd94b1632c2477526f6bd7842d700d"}, + {file = "numpy-1.26.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a4cd6ed4a339c21f1d1b0fdf13426cb3b284555c27ac2f156dfdaaa7e16bfab0"}, + {file = "numpy-1.26.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5d5244aabd6ed7f312268b9247be47343a654ebea52a60f002dc70c769048e75"}, + {file = "numpy-1.26.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a3cdb4d9c70e6b8c0814239ead47da00934666f668426fc6e94cce869e13fd7"}, + {file = "numpy-1.26.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa317b2325f7aa0a9471663e6093c210cb2ae9c0ad824732b307d2c51983d5b6"}, + {file = "numpy-1.26.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:174a8880739c16c925799c018f3f55b8130c1f7c8e75ab0a6fa9d41cab092fd6"}, + {file = "numpy-1.26.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f79b231bf5c16b1f39c7f4875e1ded36abee1591e98742b05d8a0fb55d8a3eec"}, + {file = "numpy-1.26.2-cp312-cp312-win32.whl", hash = "sha256:4a06263321dfd3598cacb252f51e521a8cb4b6df471bb12a7ee5cbab20ea9167"}, + {file = "numpy-1.26.2-cp312-cp312-win_amd64.whl", hash = "sha256:b04f5dc6b3efdaab541f7857351aac359e6ae3c126e2edb376929bd3b7f92d7e"}, + {file = "numpy-1.26.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4eb8df4bf8d3d90d091e0146f6c28492b0be84da3e409ebef54349f71ed271ef"}, + {file = "numpy-1.26.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1a13860fdcd95de7cf58bd6f8bc5a5ef81c0b0625eb2c9a783948847abbef2c2"}, + {file = "numpy-1.26.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64308ebc366a8ed63fd0bf426b6a9468060962f1a4339ab1074c228fa6ade8e3"}, + {file = "numpy-1.26.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baf8aab04a2c0e859da118f0b38617e5ee65d75b83795055fb66c0d5e9e9b818"}, + {file = "numpy-1.26.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d73a3abcac238250091b11caef9ad12413dab01669511779bc9b29261dd50210"}, + {file = "numpy-1.26.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b361d369fc7e5e1714cf827b731ca32bff8d411212fccd29ad98ad622449cc36"}, + {file = "numpy-1.26.2-cp39-cp39-win32.whl", hash = "sha256:bd3f0091e845164a20bd5a326860c840fe2af79fa12e0469a12768a3ec578d80"}, + {file = "numpy-1.26.2-cp39-cp39-win_amd64.whl", hash = "sha256:2beef57fb031dcc0dc8fa4fe297a742027b954949cabb52a2a376c144e5e6060"}, + {file = "numpy-1.26.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1cc3d5029a30fb5f06704ad6b23b35e11309491c999838c31f124fee32107c79"}, + {file = "numpy-1.26.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94cc3c222bb9fb5a12e334d0479b97bb2df446fbe622b470928f5284ffca3f8d"}, + {file = "numpy-1.26.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:fe6b44fb8fcdf7eda4ef4461b97b3f63c466b27ab151bec2366db8b197387841"}, + {file = "numpy-1.26.2.tar.gz", hash = "sha256:f65738447676ab5777f11e6bbbdb8ce11b785e105f690bc45966574816b6d3ea"}, ] [[package]] @@ -185,4 +196,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "190f3d847d562a2464c2f57cde19a966813b9aba5b64c3157a5a738ecc6d359c" +content-hash = "23793ef4bb1af938b822543ffd05c5c75074e04effbe8771d33f0af34c839257" diff --git a/src/framework/processing/py/port/script.py b/src/framework/processing/py/port/script.py index 7f696603..4f193c3d 100644 --- a/src/framework/processing/py/port/script.py +++ b/src/framework/processing/py/port/script.py @@ -1,124 +1,282 @@ +import fnmatch +import json +import numpy as np +from datetime import datetime +from collections import namedtuple +import os.path import port.api.props as props -from port.api.commands import (CommandSystemDonate, CommandSystemExit, CommandUIRender) +from port.api.commands import CommandSystemDonate, CommandSystemExit, CommandUIRender import pandas as pd import zipfile import json +ExtractionResult = namedtuple("ExtractionResult", ["id", "title", "data_frame"]) +filter_start_date = datetime(2017, 1, 1) + + +def get_in(dct, *key_path): + for key in key_path: + dct = dct.get(key) + if dct is None: + return + return dct + + +def parse_json_to_dataframe(parsed_dict): + data = [] + for obj in parsed_dict["timelineObjects"]: + if "activitySegment" not in obj: + continue + + segment = obj["activitySegment"] + start_timestamp_str = segment["duration"]["startTimestamp"] + start_timestamp = datetime.fromisoformat( + start_timestamp_str[:-1] + ) # remove the 'Z' + + if start_timestamp < filter_start_date: + continue + + activity_type = segment["activityType"] + + if activity_type not in {"WALKING", "CYCLING", "RUNNING"}: + continue + + if meters := get_in(segment, "waypointPath", "distanceMeters"): + distance_meters = meters + elif meters := get_in(segment, "simplifiedRawPath", "distanceMeters"): + distance_meters = meters + elif meters := segment.get("distance"): + distance_meters = meters + else: + continue + + data.append([start_timestamp, activity_type, distance_meters]) + + return pd.DataFrame( + data, columns=["startTimestamp", "activityType", "distanceMeters"] + ) + + +def aggregate_distance_by_day_activity(df): + # Format the startTimestamp to "year-month-day" format + df["startTimestamp"] = df["startTimestamp"].dt.strftime("%Y-%m-%d") + + # Group by formatted date and activityType, then aggregate the distance + aggregated_df = ( + df.groupby(["startTimestamp", "activityType"])["distanceMeters"] + .sum() + .reset_index() + ) + + return aggregated_df + + +def extract(df): + if df.empty: + return [] + aggregated_df = aggregate_distance_by_day_activity(df) + aggregated_df["Afstand in m"] = aggregated_df["distanceMeters"].apply(np.ceil) + + results = [] + for activity_type, title in [ + ("WALKING", {"en": "Walking", "nl": "Gewandeld"}), + ("CYCLING", {"en": "Cycling", "nl": "Gefietst"}), + ("RUNNING", {"en": "Running", "nl": "Hardgelopen"}), + ]: + df = aggregated_df.loc[aggregated_df["activityType"] == activity_type] + if len(df) == 0: + continue + + df["Datum"] = df["startTimestamp"] + df = ( + df.drop(columns=["distanceMeters", "activityType", "startTimestamp"]) + .reset_index(drop=True) + .reindex(columns=["Datum", "Afstand in m"]) + ) + results.append( + ExtractionResult( + id=activity_type.lower(), + title=props.Translatable(title), + data_frame=df, + ) + ) + return results + def process(sessionId): - key = "zip-contents-example" meta_data = [] - meta_data.append(("debug", f"{key}: start")) + meta_data.append(("debug", f"start")) # STEP 1: select the file data = None while True: - meta_data.append(("debug", f"{key}: prompt file")) - promptFile = prompt_file("application/zip, text/plain") + promptFile = prompt_file() fileResult = yield render_donation_page(promptFile) - if fileResult.__type__ == 'PayloadString': - meta_data.append(("debug", f"{key}: extracting file")) - extractionResult = doSomethingWithTheFile(fileResult.value) - if extractionResult != 'invalid': - meta_data.append(("debug", f"{key}: extraction successful, go to consent form")) - data = extractionResult - break - else: - meta_data.append(("debug", f"{key}: prompt confirmation to retry file selection")) + if fileResult.__type__ == "PayloadString": + meta_data.append(("debug", f"extracting file")) + extractionResult = extract_data_from_zip(fileResult.value) + if extractionResult == "invalid": + meta_data.append( + ("debug", f"prompt confirmation to retry file selection") + ) retry_result = yield render_donation_page(retry_confirmation()) - if retry_result.__type__ == 'PayloadTrue': - meta_data.append(("debug", f"{key}: skip due to invalid file")) + if retry_result.__type__ == "PayloadTrue": + meta_data.append(("debug", f"retry prompt file")) continue else: - meta_data.append(("debug", f"{key}: retry prompt file")) + meta_data.append(("debug", f"skip due to invalid file")) + data = ("aborted", fileResult.value) break + if extractionResult == "no-data": + retry_result = yield render_donation_page(retry_no_data_confirmation()) + if retry_result.__type__ == "PayloadTrue": + continue + else: + data = ("aborted", fileResult.value) + break + else: + meta_data.append( + ("debug", f"extraction successful, go to consent form") + ) + data = extractionResult + break + else: + meta_data.append(("debug", f"skip to next step")) + break # STEP 2: ask for consent - meta_data.append(("debug", f"{key}: prompt consent")) - prompt = prompt_consent(data, meta_data) + meta_data.append(("debug", f"prompt consent")) + error_detected = isinstance(data, tuple) + if error_detected: + prompt = prompt_report_consent(os.path.basename(data[1]), meta_data) + else: + prompt = prompt_consent(data, meta_data) consent_result = yield render_donation_page(prompt) if consent_result.__type__ == "PayloadJSON": - meta_data.append(("debug", f"{key}: donate consent data")) - yield donate(f"{sessionId}-{key}", consent_result.value) - if consent_result.__type__ == "PayloadFalse": - value = json.dumps('{"status" : "donation declined"}') - yield donate(f"{sessionId}-{key}", value) + meta_data.append(("debug", f"donate consent data")) + donation_data = json.loads(consent_result.value) + elif consent_result.__type__ == "PayloadFalse": + donation_data = {"status": "donation declined"} + if error_detected: + donation_data["error"] = "Unable to extract data from package" + yield donate(f"{sessionId}", json.dumps(donation_data)) def render_donation_page(body): - header = props.PropsUIHeader(props.Translatable({ - "en": "Port flow example", - "nl": "Port voorbeeld flow" - })) + header = props.PropsUIHeader( + props.Translatable({"en": "Google location", "nl": "Google locatie"}) + ) - page = props.PropsUIPageDonation("Zip", header, body) + page = props.PropsUIPageDonation("google-location", header, body) return CommandUIRender(page) def retry_confirmation(): - text = props.Translatable({ - "en": "Unfortunately, we cannot process your file. Continue, if you are sure that you selected the right file. Try again to select a different file.", - "nl": "Helaas, kunnen we uw bestand niet verwerken. Weet u zeker dat u het juiste bestand heeft gekozen? Ga dan verder. Probeer opnieuw als u een ander bestand wilt kiezen." - }) - ok = props.Translatable({ - "en": "Try again", - "nl": "Probeer opnieuw" - }) - cancel = props.Translatable({ - "en": "Continue", - "nl": "Verder" - }) + text = props.Translatable( + { + "en": f"Unfortunately, we cannot process your file. Continue, if you are sure that you selected the right file. Try again to select a different file.", + "nl": f"Helaas, kunnen we uw bestand niet verwerken. Weet u zeker dat u het juiste bestand heeft gekozen? Ga dan verder. Probeer opnieuw als u een ander bestand wilt kiezen.", + } + ) + ok = props.Translatable({"en": "Try again", "nl": "Probeer opnieuw"}) + cancel = props.Translatable({"en": "Continue", "nl": "Verder"}) return props.PropsUIPromptConfirm(text, ok, cancel) -def prompt_file(extensions): - description = props.Translatable({ - "en": "Please select any zip file stored on your device.", - "nl": "Selecteer een willekeurige zip file die u heeft opgeslagen op uw apparaat." - }) +def retry_no_data_confirmation(): + text = props.Translatable( + { + "en": f"Unfortunately we could not detect any location information in your file. Continue, if you are sure that you selected the right file. Try again to select a different file.", + "nl": f"We hebben helaas geen locatie informatie in uw bestand gevonden. Weet u zeker dat u het juiste bestand heeft gekozen? Ga dan verder. Probeer opnieuw als u een ander bestand wilt kiezen.", + } + ) + ok = props.Translatable({"en": "Try again", "nl": "Probeer opnieuw"}) + cancel = props.Translatable({"en": "Continue", "nl": "Verder"}) + return props.PropsUIPromptConfirm(text, ok, cancel) - return props.PropsUIPromptFileInput(description, extensions) +def prompt_file(): + description = props.Translatable( + { + "en": f"Click 'Choose file' to choose the file that you received from Google. If you click 'Continue', the data that is required for research is extracted from your file.", + "nl": f"Klik op ‘Kies bestand’ om het bestand dat u ontvangen hebt van Google te kiezen. Als u op 'Verder' klikt worden de gegevens die nodig zijn voor het onderzoek uit uw bestand gehaald.", + } + ) -def doSomethingWithTheFile(filename): - return extract_zip_contents(filename) + return props.PropsUIPromptFileInput(description, "application/zip") -def extract_zip_contents(filename): - names = [] - try: - file = zipfile.ZipFile(filename) - data = [] - for name in file.namelist(): - names.append(name) - info = file.getinfo(name) - data.append((name, info.compress_size, info.file_size)) - return data - except zipfile.error: - return "invalid" +def prompt_consent(data, meta_data): + log_title = props.Translatable({"en": "Log messages", "nl": "Log berichten"}) + tables = [] + if data is not None: + tables = [ + props.PropsUIPromptConsentFormTable(table.id, table.title, table.data_frame) + for table in data + ] -def prompt_consent(data, meta_data): + meta_frame = pd.DataFrame(meta_data, columns=["type", "message"]) + meta_table = props.PropsUIPromptConsentFormTable( + "log_messages", log_title, meta_frame + ) + return props.PropsUIPromptConsentForm(tables, [meta_table]) - table_title = props.Translatable({ - "en": "Zip file contents", - "nl": "Inhoud zip bestand" - }) - log_title = props.Translatable({ - "en": "Log messages", - "nl": "Log berichten" - }) +def prompt_report_consent(filename, meta_data): + log_title = props.Translatable({"en": "Log messages", "nl": "Log berichten"}) - tables=[] - if data is not None: - data_frame = pd.DataFrame(data, columns=["filename", "compressed size", "size"]) - tables = [props.PropsUIPromptConsentFormTable("zip_content", table_title, data_frame)] + tables = [ + props.PropsUIPromptConsentFormTable( + "filename", + props.Translatable({"nl": "Bestandsnaam", "en": "Filename"}), + pd.DataFrame({"Bestandsnaam": [filename]}), + ) + ] meta_frame = pd.DataFrame(meta_data, columns=["type", "message"]) - meta_table = props.PropsUIPromptConsentFormTable("log_messages", log_title, meta_frame) - return props.PropsUIPromptConsentForm(tables, [meta_table]) + meta_table = props.PropsUIPromptConsentFormTable( + "log_messages", log_title, meta_frame + ) + return props.PropsUIPromptConsentForm( + tables, + [meta_table], + description=props.Translatable( + { + "nl": "Helaas konden we geen gegevens uit uw gegevenspakket halen. Wilt u de onderzoekers van het LISS panel hiervan op de hoogte stellen?", + "en": "Unfortunately we could not extract any data from your package. Would you like to report this to the researchers of the LISS panel?", + } + ), + donate_question=props.Translatable( + { + "en": "Do you want to report the above data?", + "nl": "Wilt u de bovenstaande gegevens rapporteren?", + } + ), + donate_button=props.Translatable({"nl": "Ja, rapporteer", "en": "Yes, report"}), + ) + + +def filter_json_files(file_list): + pattern = "**/Semantic Location History/*/*_*.json" + return [f for f in file_list if fnmatch.fnmatch(f, pattern)] + + +def load_and_process_file(z, file, callback): + with z.open(file) as f: + return callback(json.load(f)) + + +def extract_data_from_zip(zip_filepath): + with zipfile.ZipFile(zip_filepath, "r") as z: + files = filter_json_files(z.namelist()) + dfs = [load_and_process_file(z, f, parse_json_to_dataframe) for f in files] + if not dfs: + return "no-data" + df = pd.concat(dfs, ignore_index=True) + return extract(df) def donate(key, json_string): @@ -127,3 +285,12 @@ def donate(key, json_string): def exit(code, info): return CommandSystemExit(code, info) + + +if __name__ == "__main__": + import sys + + if len(sys.argv) > 1: + print(extract_data_from_zip(sys.argv[1])) + else: + print("please provide a zip file as argument") diff --git a/src/framework/processing/py/pyproject.toml b/src/framework/processing/py/pyproject.toml index 6a48c421..a0a6ebfe 100644 --- a/src/framework/processing/py/pyproject.toml +++ b/src/framework/processing/py/pyproject.toml @@ -7,6 +7,7 @@ authors = ["Emiel van der Veen "] [tool.poetry.dependencies] python = "^3.11" pandas = "^1.5" +numpy = "^1.26.2" [tool.poetry.group.test.dependencies] pytest = "^7.4.2" diff --git a/src/framework/processing/py/tests/script_test.py b/src/framework/processing/py/tests/script_test.py new file mode 100644 index 00000000..13fc4b8c --- /dev/null +++ b/src/framework/processing/py/tests/script_test.py @@ -0,0 +1,173 @@ +from datetime import datetime +import pytest +import zipfile + + +from port.script import parse_json_to_dataframe +from port.script import aggregate_distance_by_day_activity +from port.script import extract +from port.script import extract_data_from_zip + + +@pytest.fixture +def sample_data(): + return { + "timelineObjects": [ + { + "activitySegment": { + "duration": {"startTimestamp": "2023-04-01T19:13:27.023Z"}, + "activityType": "CYCLING", + "waypointPath": {"distanceMeters": 3600.33}, + } + } + ] + } + + +@pytest.fixture +def sample_data_multiple_activities(): + return { + "timelineObjects": [ + { + "activitySegment": { + "duration": {"startTimestamp": "2023-04-01T19:13:27.023Z"}, + "activityType": "CYCLING", + "waypointPath": {"distanceMeters": 3600.33}, + } + }, + { + "activitySegment": { + "duration": {"startTimestamp": "2023-04-01T20:13:27.023Z"}, + "activityType": "CYCLING", + "waypointPath": {"distanceMeters": 1400.0}, + } + }, + { + "activitySegment": { + "duration": {"startTimestamp": "2023-04-02T08:13:27.023Z"}, + "activityType": "WALKING", + "waypointPath": {"distanceMeters": 800.5}, + } + }, + { + "activitySegment": { + "duration": {"startTimestamp": "2023-04-01T19:13:27.023Z"}, + "activityType": "RUNNING", + "waypointPath": {"distanceMeters": 3600.33}, + } + }, + { + "activitySegment": { + "duration": {"startTimestamp": "2023-04-01T20:13:27.023Z"}, + "activityType": "RUNNING", + "waypointPath": {"distanceMeters": 1400.0}, + } + }, + ] + } + + +def test_parse_json_to_dataframe(sample_data): + df = parse_json_to_dataframe(sample_data) + assert len(df) == 1 + assert df.iloc[0]["activityType"] == "CYCLING" + assert df.iloc[0]["distanceMeters"] == 3600.33 + assert isinstance(df.iloc[0]["startTimestamp"], datetime) + + +def test_parse_json_to_dataframe_skips_non_walking_or_cycling(): + parsed_dict = { + "timelineObjects": [ + { + "activitySegment": { + "activityType": "WALKING", + "duration": {"startTimestamp": "2023-09-17T10:00:00Z"}, + "waypointPath": {"distanceMeters": 1000}, + } + }, + { + "activitySegment": { + "activityType": "CYCLING", + "duration": {"startTimestamp": "2023-09-17T11:00:00Z"}, + "waypointPath": {"distanceMeters": 5000}, + } + }, + { + "activitySegment": { + "activityType": "DRIVING", + "duration": {"startTimestamp": "2023-09-17T12:00:00Z"}, + "waypointPath": {"distanceMeters": 20000}, + } + }, + ] + } + + df = parse_json_to_dataframe(parsed_dict) + assert "DRIVING" not in df.activityType.values + + +def test_parse_json_to_dataframe_skips_entries_before_filter_date(): + parsed_dict = { + "timelineObjects": [ + { + "activitySegment": { + "duration": {"startTimestamp": "2016-12-31T19:13:27.023Z"}, + "activityType": "CYCLING", + "waypointPath": {"distanceMeters": 3600.33}, + } + } + ] + } + df = parse_json_to_dataframe(parsed_dict) + assert len(df) == 0 + + +def test_aggregate_distance_by_day_activity(sample_data): + df = parse_json_to_dataframe(sample_data) + aggregated_df = aggregate_distance_by_day_activity(df) + + assert len(aggregated_df) == 1 + assert aggregated_df.iloc[0]["startTimestamp"] == "2023-04-01" + assert aggregated_df.iloc[0]["activityType"] == "CYCLING" + assert aggregated_df.iloc[0]["distanceMeters"] == 3600.33 + + +def test_aggregation_over_multiple_activities(sample_data_multiple_activities): + df = parse_json_to_dataframe(sample_data_multiple_activities) + aggregated_df = aggregate_distance_by_day_activity(df) + + # Verify that there are 2 aggregated entries (one for each day) + assert len(aggregated_df) == 3 + + # For 2023-04-01, there were two cycling activities. We sum their distances. + cycling_data = aggregated_df[(aggregated_df["activityType"] == "CYCLING")] + assert len(cycling_data) == 1 + assert cycling_data.iloc[0]["distanceMeters"] == (3600.33 + 1400.0) + + # For 2023-04-02, there was one walking activity. + walking_data = aggregated_df[aggregated_df["activityType"] == "WALKING"] + assert len(walking_data) == 1 + assert walking_data.iloc[0]["distanceMeters"] == 800.5 + + # For 2023-05-02, there was one running activity. + walking_data = aggregated_df[aggregated_df["activityType"] == "RUNNING"] + assert len(walking_data) == 1 + assert walking_data.iloc[0]["distanceMeters"] == (3600.33 + 1400.0) + + +def test_extract_sample_data(sample_data): + results = extract(parse_json_to_dataframe(sample_data)) + # Verify the results + assert len(results) == 1 + assert results[0].id == "cycling" + assert results[0].title.translations["nl"] == "Gefietst" + for result in results: + assert "distanceMeters" not in result.data_frame.columns + assert "Afstand in m" in result.data_frame.columns + + +def test_empty_zip(tmp_path): + path = tmp_path.joinpath("test.zip") + z = zipfile.ZipFile(path, "w") + z.close() + assert extract_data_from_zip(path) == "no-data"