diff --git a/doc/source/api/api.md b/doc/source/api/api.md index d8e5808e..46cd77ef 100644 --- a/doc/source/api/api.md +++ b/doc/source/api/api.md @@ -1,4 +1,4 @@ -# API reference +# API Reference ## Props @@ -27,12 +27,3 @@ .. automodule:: port.helpers.validate :members: ``` - -## Platforms - -### ChatGPT - -```{eval-rst} -.. automodule:: port.platforms.chatgpt - :members: -``` diff --git a/doc/source/api/index.rst b/doc/source/api/index.rst index 574ca121..15da24b5 100644 --- a/doc/source/api/index.rst +++ b/doc/source/api/index.rst @@ -1,5 +1,5 @@ -API Documentation -============================= +API Reference +============= .. toctree:: :maxdepth: 3 diff --git a/doc/source/articles/index.rst b/doc/source/articles/index.rst index f32bbe6e..c1474238 100644 --- a/doc/source/articles/index.rst +++ b/doc/source/articles/index.rst @@ -7,7 +7,7 @@ Articles introduction-to-data-donation.md installation.md creating-your-own-data-donation-task.md + visualizations.md data-donation-checklist.md deployment.md next-in-docker.md - visualizations.md diff --git a/doc/source/conf.py b/doc/source/conf.py index d8465e9d..0859a641 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -32,3 +32,9 @@ html_theme = 'piccolo_theme' html_static_path = ['_static'] + +html_sidebars = { + '**': [ + 'globaltoc.html', + ] +} diff --git a/doc/source/index.rst b/doc/source/index.rst index 3436c4b6..bb6395e0 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -4,6 +4,10 @@ The Data Donation Task Documentation Welcome to the Data Donation Task Documentation! This is the place to learn about the data donation task. + +Getting started +--------------- + Checkout the following wiki articles to get started: .. toctree:: @@ -11,6 +15,8 @@ Checkout the following wiki articles to get started: articles/index +API Reference +----------------- You can find the API documentation here: @@ -19,6 +25,15 @@ You can find the API documentation here: api/index +Standard scripts +---------------- + +We provide standard extraction scripts for a various platforms which you can find here: + +.. toctree:: + :maxdepth: 1 + + standard_scripts/index Indices and tables ================== diff --git a/doc/source/standard_scripts/index.rst b/doc/source/standard_scripts/index.rst new file mode 100644 index 00000000..f38e98ce --- /dev/null +++ b/doc/source/standard_scripts/index.rst @@ -0,0 +1,32 @@ +Platform Documentation +============================= + +For various platforms we provide default extraction scripts, so you do not have to invent the wheel. + +Freel free to use the extraction scripts as you see fit. + +In order to use the scripts open the file `src/framework/processing/py/port/main.py` and change this line: + +.. code-block:: python + + from port.script import process + +to: + +.. code-block:: python + + #from port.script import process + + # Change to (in this case the standard script for instagram will be used): + from port.platforms.instagram import process + +Available platforms +------------------- + +.. automodule:: port.platforms.chatgpt + + +Instagram +--------- + +.. automodule:: port.platforms.instagram diff --git a/src/framework/processing/py/port/api/props.py b/src/framework/processing/py/port/api/props.py index 1addf84d..5baa7ad0 100644 --- a/src/framework/processing/py/port/api/props.py +++ b/src/framework/processing/py/port/api/props.py @@ -106,18 +106,18 @@ class PropsUIPromptConsentFormTable: Attributes: id (str): A unique string to identify the table after donation. title (Translatable): Title of the table. - data_frame (pd.DataFrame | Dict[str, Dict[str, Any]]): Table to be shown can be a pandas data frame or a dictionary + data_frame (pd.DataFrame | Dict[str, Dict[str, Any]]): Table to be shown can be a pandas DataFrame or a dictionary. description (Optional[Translatable]): Optional description of the table. visualizations (Optional[list]): Optional visualizations to be shown. folded (Optional[bool]): Whether the table should be initially folded. delete_option (Optional[bool]): Whether to show a delete option for the table. - Examples: - # Using a pandas DataFrame for data_frame + Examples:: + data_frame_df = pd.DataFrame([ {"column1": 1, "column2": 4}, {"column1": 2, "column2": 5}, - {"column1": 3, "column2": 6} + {"column1": 3, "column2": 6}, ]) example1 = PropsUIPromptConsentFormTable( @@ -126,12 +126,12 @@ class PropsUIPromptConsentFormTable: data_frame=data_frame_df, ) - # Using a dictionary for data_frame data_frame_dict = { "column1": {"0": 1, "1": 4}, "column2": {"0": 2, "1": 5}, - "column3": {"0": 3, "1": 6} + "column3": {"0": 3, "1": 6}, } + example2 = PropsUIPromptConsentFormTable( id="example2", title=Translatable("Table as Dictionary"), @@ -474,10 +474,12 @@ class PropsUIPageDonation: A multi-purpose page that gets shown to the user. Attributes: - platform (str): The platform name the user is currently in the process of donating data from. + platform (str): + The platform name the user is currently in the process of donating data from. header (PropsUIHeader): Page header. - body (PropsUIPromptRadioInput | PropsUIPromptConsentForm | PropsUIPromptFileInput | - PropsUIPromptFileInputMultiple | PropsUIPromptConfirm | PropsUIPromptQuestionnaire): + body (PropsUIPromptRadioInput | PropsUIPromptConsentForm | + PropsUIPromptFileInput | PropsUIPromptFileInputMultiple | + PropsUIPromptConfirm | PropsUIPromptQuestionnaire): Main body of the page. footer (Optional[PropsUIFooter]): Optional page footer. """ diff --git a/src/framework/processing/py/port/helpers/extraction_helpers.py b/src/framework/processing/py/port/helpers/extraction_helpers.py index 6f25291d..8a9df8c6 100644 --- a/src/framework/processing/py/port/helpers/extraction_helpers.py +++ b/src/framework/processing/py/port/helpers/extraction_helpers.py @@ -32,7 +32,8 @@ def dict_denester(inp: dict[Any, Any] | list[Any], new: dict[Any, Any] | None = Returns: dict[Any, Any]: A new denested dictionary. - Examples: + Examples:: + >>> nested_dict = {"a": {"b": {"c": 1}}, "d": [2, 3]} >>> dict_denester(nested_dict) {"a-b-c": 1, "d-0": 2, "d-1": 3} @@ -73,7 +74,8 @@ def find_item(d: dict[Any, Any], key_to_match: str) -> str: Raises: Exception: Logs an error message if an exception occurs during the search. - Examples: + Examples:: + >>> d = {"asd-asd-asd": 1, "asd-asd": 2, "qwe": 3} >>> find_item(d, "asd") "2" @@ -109,7 +111,8 @@ def find_items(d: dict[Any, Any], key_to_match: str) -> list: Raises: Exception: Logs an error message if an exception occurs during the search. - Examples: + Examples:: + >>> d = {"asd-1": "a", "asd-2": "b", "qwe": "c"} >>> find_items(d, "asd") ["a", "b"] @@ -140,7 +143,8 @@ def json_dumper(zfile: str) -> pd.DataFrame: Raises: Exception: Logs an error message if an exception occurs during the process. - Examples: + Examples:: + >>> df = json_dumper("data.zip") >>> print(df.head()) """ @@ -180,7 +184,8 @@ def fix_ascii_string(input: str) -> str: Returns: str: The fixed string with only ASCII characters, or the original string if an exception occurs. - Examples: + Examples:: + >>> fix_ascii_string("Hello, 世界!") "Hello, !" """ @@ -201,7 +206,8 @@ def replace_months(input_string: str) -> str: Returns: str: The input string with Dutch month abbreviations replaced by English equivalents. - Examples: + Examples:: + >>> replace_months("15 mei 2023") "15 may 2023" """ @@ -233,7 +239,8 @@ def epoch_to_iso(epoch_timestamp: str | int | float) -> str: Raises: Exception: Logs an error message if conversion fails. - Examples: + Examples:: + >>> epoch_to_iso(1632139200) "2021-09-20T12:00:00+00:00" """ @@ -257,7 +264,8 @@ def sort_isotimestamp_empty_timestamp_last(timestamp_series: pd.Series) -> pd.Se Returns: pd.Series: A Series of sorting keys, with -timestamp for valid dates and infinity for invalid/empty dates. - Examples: + Examples:: + >>> df = df.sort_values(by="Date", key=sort_isotimestamp_empty_timestamp_last) """ def convert_timestamp(timestamp): @@ -285,7 +293,8 @@ def fix_latin1_string(input: str) -> str: Returns: str: The fixed string after encoding and decoding, or the original string if an exception occurs. - Examples: + Examples:: + >>> fix_latin1_string("café") "café" """ @@ -319,7 +328,8 @@ def extract_file_from_zip(zfile: str, file_to_extract: str) -> io.BytesIO: zipfile.BadZipFile: Logs an error if the zip file is invalid. Exception: Logs any other unexpected errors. - Examples: + Examples:: + >>> extracted_file = extract_file_from_zip("archive.zip", "data.txt") >>> content = extracted_file.getvalue().decode('utf-8') """ @@ -385,7 +395,8 @@ def _json_reader_file(json_file: str, encoding: str) -> Any: Returns: Any: The parsed JSON data. - Examples: + Examples:: + >>> data = _json_reader_file("data.json", "utf-8") >>> print(data) {'key': 'value'} @@ -413,7 +424,8 @@ def _read_json(json_input: Any, json_reader: Callable[[Any, str], Any]) -> dict[ json.JSONDecodeError: Logs an error if JSON decoding fails. Exception: Logs any other unexpected errors. - Examples: + Examples:: + >>> data = _read_json(b'{"key": "value"}', _json_reader_bytes) >>> print(data) {'key': 'value'} @@ -456,7 +468,8 @@ def read_json_from_bytes(json_bytes: io.BytesIO) -> dict[Any, Any] | list[Any]: dict[Any, Any] | list[Any]: The parsed JSON data as a dictionary or list. Returns an empty dictionary if parsing fails. - Examples: + Examples:: + >>> buffer = io.BytesIO(b'{"key": "value"}') >>> data = read_json_from_bytes(buffer) >>> print(data) @@ -483,7 +496,8 @@ def read_json_from_file(json_file: str) -> dict[Any, Any] | list[Any]: dict[Any, Any] | list[Any]: The parsed JSON data as a dictionary or list. Returns an empty dictionary if parsing fails. - Examples: + Examples:: + >>> data = read_json_from_file("data.json") >>> print(data) {'key': 'value'} @@ -504,6 +518,7 @@ def read_csv_from_bytes(json_bytes: io.BytesIO) -> list[dict[Any, Any]]: Returns an empty list if parsing fails. Examples: + >>> buffer = io.BytesIO(b'name,age\\nAlice,30\\nBob,25') >>> data = read_csv_from_bytes(buffer) >>> print(data) @@ -536,6 +551,7 @@ def read_csv_from_bytes_to_df(json_bytes: io.BytesIO) -> pd.DataFrame: pd.DataFrame: A pandas DataFrame containing the CSV data. Examples: + >>> buffer = io.BytesIO(b'name,age\\nAlice,30\\nBob,25') >>> df = read_csv_from_bytes_to_df(buffer) >>> print(df) diff --git a/src/framework/processing/py/port/helpers/port_helpers.py b/src/framework/processing/py/port/helpers/port_helpers.py index 606dfe63..883f756f 100644 --- a/src/framework/processing/py/port/helpers/port_helpers.py +++ b/src/framework/processing/py/port/helpers/port_helpers.py @@ -167,7 +167,8 @@ def exit(code: int, info: str) -> CommandSystemExit: Returns: CommandSystemExit: A system command that initiates the exit process in Next. - Examples: + Examples:: + yield exit(0, "Success") """ return CommandSystemExit(code, info) diff --git a/src/framework/processing/py/port/platforms/instagram.py b/src/framework/processing/py/port/platforms/instagram.py new file mode 100644 index 00000000..f2a92519 --- /dev/null +++ b/src/framework/processing/py/port/platforms/instagram.py @@ -0,0 +1,613 @@ +""" +Instagram + +This module contains an example flow of a Instagram data donation study +""" +import logging + +import pandas as pd + +import port.api.props as props +import port.helpers.extraction_helpers as eh +import port.helpers.port_helpers as ph +import port.helpers.validate as validate + +from port.helpers.validate import ( + DDPCategory, + DDPFiletype, + Language, +) + +logger = logging.getLogger(__name__) + +DDP_CATEGORIES = [ + DDPCategory( + id="json_en", + ddp_filetype=DDPFiletype.JSON, + language=Language.EN, + known_files=[ + "secret_conversations.json", + "personal_information.json", + "account_privacy_changes.json", + "account_based_in.json", + "recently_deleted_content.json", + "liked_posts.json", + "stories.json", + "profile_photos.json", + "followers.json", + "signup_information.json", + "comments_allowed_from.json", + "login_activity.json", + "your_topics.json", + "camera_information.json", + "recent_follow_requests.json", + "devices.json", + "professional_information.json", + "follow_requests_you've_received.json", + "eligibility.json", + "pending_follow_requests.json", + "videos_watched.json", + "ads_interests.json", + "account_searches.json", + "following.json", + "posts_viewed.json", + "recently_unfollowed_accounts.json", + "post_comments.json", + "account_information.json", + "accounts_you're_not_interested_in.json", + "use_cross-app_messaging.json", + "profile_changes.json", + "reels.json", + ], + ) +] + + + +def accounts_not_interested_in_to_df(instagram_zip: str) -> pd.DataFrame: + + b = eh.extract_file_from_zip(instagram_zip, "accounts_you're_not_interested_in.json") + d = eh.read_json_from_bytes(b) + + out = pd.DataFrame() + datapoints = [] + + try: + items = d["impressions_history_recs_hidden_authors"] # pyright: ignore + for item in items: + data = item.get("string_map_data", {}) + account_name = data.get("Username", {}).get("value", None), + if "Time" in data: + timestamp = data.get("Time", {}).get("timestamp", "") + else: + timestamp = data.get("Tijd", {}).get("timestamp", "") + + datapoints.append(( + account_name, + eh.epoch_to_iso(timestamp) + )) + out = pd.DataFrame(datapoints, columns=["Account name", "Date"]) # pyright: ignore + out = out.sort_values(by="Date", key=eh.sort_isotimestamp_empty_timestamp_last) + + except Exception as e: + logger.error("Exception caught: %s", e) + + return out + + +def ads_viewed_to_df(instagram_zip: str) -> pd.DataFrame: + + b = eh.extract_file_from_zip(instagram_zip, "ads_viewed.json") + d = eh.read_json_from_bytes(b) + + out = pd.DataFrame() + datapoints = [] + + try: + items = d["impressions_history_ads_seen"] # pyright: ignore + for item in items: + data = item.get("string_map_data", {}) + account_name = data.get("Author", {}).get("value", None) + if "Time" in data: + timestamp = data.get("Time", {}).get("timestamp", "") + else: + timestamp = data.get("Tijd", {}).get("timestamp", "") + + datapoints.append(( + account_name, + eh.epoch_to_iso(timestamp) + )) + out = pd.DataFrame(datapoints, columns=["Author of ad", "Date"]) # pyright: ignore + out = out.sort_values(by="Date", key=eh.sort_isotimestamp_empty_timestamp_last) + + except Exception as e: + logger.error("Exception caught: %s", e) + + return out + + +def posts_viewed_to_df(instagram_zip: str) -> pd.DataFrame: + + b = eh.extract_file_from_zip(instagram_zip, "posts_viewed.json") + d = eh.read_json_from_bytes(b) + + out = pd.DataFrame() + datapoints = [] + + try: + items = d["impressions_history_posts_seen"] # pyright: ignore + for item in items: + data = item.get("string_map_data", {}) + account_name = data.get("Author", {}).get("value", None) + if "Time" in data: + timestamp = data.get("Time", {}).get("timestamp", "") + else: + timestamp = data.get("Tijd", {}).get("timestamp", "") + + datapoints.append(( + account_name, + eh.epoch_to_iso(timestamp) + )) + out = pd.DataFrame(datapoints, columns=["Author", "Date"]) # pyright: ignore + out = out.sort_values(by="Date", key=eh.sort_isotimestamp_empty_timestamp_last) + + except Exception as e: + logger.error("Exception caught: %s", e) + + return out + + + +def posts_not_interested_in_to_df(instagram_zip: str) -> pd.DataFrame: + + b = eh.extract_file_from_zip(instagram_zip, "posts_you're_not_interested_in.json") + data = eh.read_json_from_bytes(b) + + out = pd.DataFrame() + datapoints = [] + + try: + items = data["impressions_history_posts_not_interested"] # pyright: ignore + for item in items: + d = eh.dict_denester(item.get("string_list_data")) + datapoints.append(( + eh.fix_latin1_string(eh.find_item(d, "value")), + eh.find_item(d, "href"), + eh.epoch_to_iso(eh.find_item(d, "timestamp")) + )) + out = pd.DataFrame(datapoints, columns=["Post", "Link", "Date"]) # pyright: ignore + out = out.sort_values(by="Date", key=eh.sort_isotimestamp_empty_timestamp_last) + + except Exception as e: + logger.error("Exception caught: %s", e) + + return out + + + +def videos_watched_to_df(instagram_zip: str) -> pd.DataFrame: + + b = eh.extract_file_from_zip(instagram_zip, "videos_watched.json") + d = eh.read_json_from_bytes(b) + + out = pd.DataFrame() + datapoints = [] + + try: + items = d["impressions_history_videos_watched"] # pyright: ignore + for item in items: + data = item.get("string_map_data", {}) + account_name = data.get("Author", {}).get("value", None) + if "Time" in data: + timestamp = data.get("Time", {}).get("timestamp", "") + else: + timestamp = data.get("Tijd", {}).get("timestamp", "") + + datapoints.append(( + account_name, + eh.epoch_to_iso(timestamp) + )) + out = pd.DataFrame(datapoints, columns=["Author", "Date"]) # pyright: ignore + out = out.sort_values(by="Date", key=eh.sort_isotimestamp_empty_timestamp_last) + + except Exception as e: + logger.error("Exception caught: %s", e) + + return out + + +def post_comments_to_df(instagram_zip: str) -> pd.DataFrame: + """ + You can have 1 to n files of post_comments_.json + """ + + out = pd.DataFrame() + datapoints = [] + i = 1 + + while True: + b = eh.extract_file_from_zip(instagram_zip, f"post_comments_{i}.json") + d = eh.read_json_from_bytes(b) + + if not d: + break + + try: + for item in d: + data = item.get("string_map_data", {}) + media_owner = data.get("Media Owner", {}).get("value", "") + comment = data.get("Comment", {}).get("value", "") + if "Time" in data: + timestamp = data.get("Time", {}).get("timestamp", "") + else: + timestamp = data.get("Tijd", {}).get("timestamp", "") + + datapoints.append(( + media_owner, + eh.fix_latin1_string(comment), + eh.epoch_to_iso(timestamp) + )) + i += 1 + + except Exception as e: + logger.error("Exception caught: %s", e) + return pd.DataFrame() + + out = pd.DataFrame(datapoints, columns=["Media Owner", "Comment", "Date"]) # pyright: ignore + + return out + + + +def following_to_df(instagram_zip: str) -> pd.DataFrame: + + b = eh.extract_file_from_zip(instagram_zip, "following.json") + data = eh.read_json_from_bytes(b) + + out = pd.DataFrame() + datapoints = [] + + try: + items = data["relationships_following"] # pyright: ignore + for item in items: + d = eh.dict_denester(item) + datapoints.append(( + eh.fix_latin1_string(eh.find_item(d, "value")), + eh.find_item(d, "href"), + eh.epoch_to_iso(eh.find_item(d, "timestamp")) + )) + out = pd.DataFrame(datapoints, columns=["Account", "Link", "Date"]) # pyright: ignore + out = out.sort_values(by="Date", key=eh.sort_isotimestamp_empty_timestamp_last) + + except Exception as e: + logger.error("Exception caught: %s", e) + + return out + + + +def liked_comments_to_df(instagram_zip: str) -> pd.DataFrame: + + b = eh.extract_file_from_zip(instagram_zip, "liked_comments.json") + data = eh.read_json_from_bytes(b) + + out = pd.DataFrame() + datapoints = [] + + try: + items = data["likes_comment_likes"] #pyright: ignore + for item in items: + d = eh.dict_denester(item) + datapoints.append(( + eh.fix_latin1_string(eh.find_item(d, "title")), + eh.fix_latin1_string(eh.find_item(d, "value")), + eh.find_items(d, "href"), + eh.epoch_to_iso(eh.find_item(d, "timestamp")) + )) + out = pd.DataFrame(datapoints, columns=["Account name", "Value", "Link", "Date"]) # pyright: ignore + out = out.sort_values(by="Date", key=eh.sort_isotimestamp_empty_timestamp_last) + + except Exception as e: + logger.error("Exception caught: %s", e) + + return out + + +def liked_posts_to_df(instagram_zip: str) -> pd.DataFrame: + + b = eh.extract_file_from_zip(instagram_zip, "liked_posts.json") + data = eh.read_json_from_bytes(b) + + out = pd.DataFrame() + datapoints = [] + + try: + items = data["likes_media_likes"] #pyright: ignore + for item in items: + d = eh.dict_denester(item) + datapoints.append(( + eh.fix_latin1_string(eh.find_item(d, "title")), + eh.fix_latin1_string(eh.find_item(d, "value")), + eh.find_items(d, "href"), + eh.epoch_to_iso(eh.find_item(d, "timestamp")) + )) + out = pd.DataFrame(datapoints, columns=["Account name", "Value", "Link", "Date"]) # pyright: ignore + out = out.sort_values(by="Date", key=eh.sort_isotimestamp_empty_timestamp_last) + + except Exception as e: + logger.error("Exception caught: %s", e) + + return out + + + +def extraction(instagram_zip: str) -> list[props.PropsUIPromptConsentFormTable]: + tables_to_render = [] + + df = posts_viewed_to_df(instagram_zip) + if not df.empty: + table_title = props.Translatable({ + "en": "Posts viewed on Instagram", + "nl": "Posts viewed on Instagram" + }) + table_description = props.Translatable({ + "en": "In this table you find the accounts of posts you viewed on Instagram sorted over time. Below, you find visualizations of different parts of this table. First, you find a timeline showing you the number of posts you viewed over time. Next, you find a histogram indicating how many posts you have viewed per hour of the day.", + "nl": "In this table you find the accounts of posts you viewed on Instagram sorted over time. Below, you find visualizations of different parts of this table. First, you find a timeline showing you the number of posts you viewed over time. Next, you find a histogram indicating how many posts you have viewed per hour of the day.", + }) + total_watched = { + "title": { + "en": "The total number of Instagram posts you viewed over time", + "nl": "The total number of Instagram posts you viewed over time", + }, + "type": "area", + "group": { + "column": "Date", + "dateFormat": "auto", + }, + "values": [{ + "label": "Count", + "aggregate": "count", + }] + } + + hour_of_the_day = { + "title": { + "en": "The total number of Instagram posts you have viewed per hour of the day", + "nl": "The total number of Instagram posts you have viewed per hour of the day", + }, + "type": "bar", + "group": { + "column": "Date", + "dateFormat": "hour_cycle", + "label": "Hour of the day", + }, + "values": [{ + "label": "Count" + }] + } + + table = props.PropsUIPromptConsentFormTable("instagram_posts_viewed", table_title, df, table_description, [total_watched, hour_of_the_day]) + tables_to_render.append(table) + + df = videos_watched_to_df(instagram_zip) + if not df.empty: + table_title = props.Translatable({ + "en": "Videos watched on Instagram", + "nl": "Videos watched on Instagram" + }) + table_description = props.Translatable({ + "en": "In this table you find the accounts of videos you watched on Instagram sorted over time. Below, you find a timeline showing you the number of videos you watched over time.", + "nl": "In this table you find the accounts of videos you watched on Instagram sorted over time. Below, you find a timeline showing you the number of videos you watched over time. ", + }) + + total_watched = { + "title": { + "en": "The total number of videos watched on Instagram over time", + "nl": "The total number of videos watched on Instagram over time", + }, + "type": "area", + "group": { + "column": "Date", + "dateFormat": "auto" + }, + "values": [{ + "aggregate": "count", + "label": "Count" + }] + } + + table = props.PropsUIPromptConsentFormTable("instagram_videos_watched", table_title, df, table_description, [total_watched]) + tables_to_render.append(table) + + + df = post_comments_to_df(instagram_zip) + if not df.empty: + table_title = props.Translatable({ + "en": "Comments on Instagram posts", + "nl": "Comments on Instagram posts", + }) + table_description = props.Translatable({ + "en": "In this table, you find the comments that you left behind on Instagram posts sorted over time. Below, you find a wordcloud, where the size of the word indicates how frequently that word has been used in these comments.", + "nl": "In this table, you find the comments that you left behind on Instagram posts sorted over time. Below, you find a wordcloud, where the size of the word indicates how frequently that word has been used in these comments.", + }) + wordcloud = { + "title": { + "en": "Most common words in comments on posts", + "nl": "Most common words in comments on posts", + }, + "type": "wordcloud", + "textColumn": "Comment", + "tokenize": True, + } + table = props.PropsUIPromptConsentFormTable("instagram_post_comments", table_title, df, table_description, [wordcloud]) + tables_to_render.append(table) + + df = accounts_not_interested_in_to_df(instagram_zip) + if not df.empty: + table_title = props.Translatable({ + "en": "Instagram accounts not interested in", + "nl": "Instagram accounts not interested in" + }) + table_description = props.Translatable({ + "en": "", + "nl": "", + }) + table = props.PropsUIPromptConsentFormTable("instagram_accounts_not_interested_in", table_title, df, table_description) + tables_to_render.append(table) + + df = ads_viewed_to_df(instagram_zip) + if not df.empty: + table_title = props.Translatable({ + "en": "Ads you viewed on Instagram", + "nl": "Ads you viewed on Instagram" + }) + table_description = props.Translatable({ + "en": "In this table, you find the ads that you viewed on Instagram sorted over time.", + "nl": "In this table, you find the ads that you viewed on Instagram sorted over time.", + }) + table = props.PropsUIPromptConsentFormTable("instagram_ads_viewed", table_title, df, table_description) + tables_to_render.append(table) + + df = posts_not_interested_in_to_df(instagram_zip) + if not df.empty: + table_title = props.Translatable({ + "en": "Instagram posts not interested in", + "nl": "Instagram posts not interested in" + }) + table_description = props.Translatable({ + "en": "", + "nl": "", + }) + table = props.PropsUIPromptConsentFormTable("instagram_posts_not_interested_in", table_title, df, table_description) + tables_to_render.append(table) + + + df = following_to_df(instagram_zip) + if not df.empty: + table_title = props.Translatable({ + "en": "Accounts that you follow on Instagram", + "nl": "Accounts that you follow on Instagram" + }) + table_description = props.Translatable({ + "en": "In this table, you find the accounts that you follow on Instagram.", + "nl": "In this table, you find the accounts that you follow on Instagram.", + }) + table = props.PropsUIPromptConsentFormTable("instagram_following", table_title, df, table_description) + tables_to_render.append(table) + + df = liked_comments_to_df(instagram_zip) + if not df.empty: + table_title = props.Translatable({ + "en": "Instagram liked comments", + "nl": "Instagram liked comments", + }) + wordcloud = { + "title": { + "en": "Accounts who's comments you liked most", + "nl": "Accounts who's comments you liked most", + }, + "type": "wordcloud", + "textColumn": "Account name", + "tokenize": False, + } + table_description = props.Translatable({ + "en": "", + "nl": "", + }) + table = props.PropsUIPromptConsentFormTable("instagram_liked_comments", table_title, df, table_description, [wordcloud]) + tables_to_render.append(table) + + df = liked_posts_to_df(instagram_zip) + if not df.empty: + table_description = props.Translatable({ + "en": "", + "nl": "", + }) + wordcloud = { + "title": { + "en": "Most liked accounts", + "nl": "Most liked accounts", + }, + "type": "wordcloud", + "textColumn": "Account name", + "tokenize": False, + } + table_title = props.Translatable({ + "en": "Instagram liked posts", + "nl": "Instagram liked posts", + }) + table_description = props.Translatable({ + "en": "", + "nl": "", + }) + table = props.PropsUIPromptConsentFormTable("instagram_liked_posts", table_title, df, table_description, [wordcloud]) + tables_to_render.append(table) + + return tables_to_render + + +# TEXTS +SUBMIT_FILE_HEADER = props.Translatable({ + "en": "Select your Instagram file", + "nl": "Selecteer uw Instagram bestand" +}) + +REVIEW_DATA_HEADER = props.Translatable({ + "en": "Your Instagram data", + "nl": "Uw Instagram gegevens" +}) + +RETRY_HEADER = props.Translatable({ + "en": "Try again", + "nl": "Probeer opnieuw" +}) + +REVIEW_DATA_DESCRIPTION = props.Translatable({ + "en": "Below you will find a currated selection of Instagram data.", + "nl": "Below you will find a currated selection of Instagram data.", +}) + + +def process(session_id: int): + platform_name = "Instagram" + + table_list = None + while True: + logger.info("Prompt for file for %s", platform_name) + + file_prompt = ph.generate_file_prompt("application/zip") + file_result = yield ph.render_page(SUBMIT_FILE_HEADER, file_prompt) + + if file_result.__type__ == "PayloadString": + validation = validate.validate_zip(DDP_CATEGORIES, file_result.value) + + # Happy flow: Valid DDP + if validation.get_status_code_id() == 0: + logger.info("Payload for %s", platform_name) + extraction_result = extraction(file_result.value) + table_list = extraction_result + break + + # Enter retry flow, reason: if DDP was not a Instagram DDP + if validation.get_status_code_id() != 0: + logger.info("Not a valid %s zip; No payload; prompt retry_confirmation", platform_name) + retry_prompt = ph.generate_retry_prompt(platform_name) + retry_result = yield ph.render_page(RETRY_HEADER, retry_prompt) + + if retry_result.__type__ == "PayloadTrue": + continue + else: + logger.info("Skipped during retry flow") + break + + else: + logger.info("Skipped at file selection ending flow") + break + + if table_list is not None: + logger.info("Prompt consent; %s", platform_name) + review_data_prompt = ph.generate_review_data_prompt(f"{session_id}-instagram", REVIEW_DATA_DESCRIPTION, table_list) + yield ph.render_page(REVIEW_DATA_HEADER, review_data_prompt) + + yield ph.exit(0, "Success") + yield ph.render_end_page()