owid · veronikasamborska1994 · Jul 25, 2023 · Jul 24, 2023 · Jul 24, 2023 · Jul 24, 2023
diff --git a/dag/artificial_intelligence.yml b/dag/artificial_intelligence.yml
@@ -205,3 +205,4 @@ steps:
   - data://meadow/artificial_intelligence/2023-07-07/semiconductors_cset
   data://grapher/artificial_intelligence/2023-07-07/semiconductors_cset:
   - data://garden/artificial_intelligence/2023-07-07/semiconductors_cset
+
diff --git a/etl/steps/data/garden/artificial_intelligence/2023-06-14/ai_national_strategy.py b/etl/steps/data/garden/artificial_intelligence/2023-06-14/ai_national_strategy.py
@@ -61,7 +61,7 @@ def run(dest_dir: str) -> None:
             group["released_national_strategy_on_ai"].fillna(method="ffill", inplace=True)
 
         # Fill remaining NaN values with "Not Released"
-        group["released_national_strategy_on_ai"].fillna("Not Released", inplace=True)
+        group["released_national_strategy_on_ai"].fillna("Not released", inplace=True)
         df_merged.loc[group.index] = group
     df_merged.drop("released", axis=1, inplace=True)
     tb = Table(df_merged, short_name=paths.short_name, underscore=True)

diff --git a/etl/steps/data/garden/artificial_intelligence/2023-06-14/ai_robots.meta.yml b/etl/steps/data/garden/artificial_intelligence/2023-06-14/ai_robots.meta.yml
@@ -184,22 +184,23 @@ tables:
         unit: 'robots'
         description: "Industrial robots are defined as “automatically controlled, reprogrammable, multipurpose manipulator, programmable in three or more axes, which can be either fixed in place or mobile for use in industrial automation applications."
         display:
+            name: Total industrial robots in operation
             numDecimalPlaces: 0
 
       number_of_industrial_robots_installed_2021:
         title: Total number of industrial robots installed by contry, 2021
         unit: 'robots'
         description: "Industrial robots are defined as “automatically controlled, reprogrammable, multipurpose manipulator, programmable in three or more axes, which can be either fixed in place or mobile for use in industrial automation applications."
         display:
-          name: Number of industrial robots installed
+          name: Total of industrial robots installed
           numDecimalPlaces: 0
 
       annual_count__number_of_industrial_robots_installed:
         title: Annual number of industrial robots installed in the world 2011-2021
         unit: 'robots'
         description: "Industrial robots are defined as “automatically controlled, reprogrammable, multipurpose manipulator, programmable in three or more axes, which can be either fixed in place or mobile for use in industrial automation applications."
         display:
-          name: Number of industrial robots installed
+          name: Annual industrial robots installed
           numDecimalPlaces: 0
 
       new_robots_installed__number_of_industrial_robots_installed:
@@ -208,4 +209,11 @@ tables:
         description: "Industrial robots are defined as “automatically controlled, reprogrammable, multipurpose manipulator, programmable in three or more axes, which can be either fixed in place or mobile for use in industrial automation applications."
         display:
           name: Number of industrial robots installed
+          numDecimalPlaces: 0
+      unspecified_others:
+        title: Unspecified or Other Sector
+        unit: 'robots'
+        description: "Industrial robots are defined as “automatically controlled, reprogrammable, multipurpose manipulator, programmable in three or more axes, which can be either fixed in place or mobile for use in industrial automation applications."
+        display:
+          name: Unspecified or other
           numDecimalPlaces: 0
diff --git a/etl/steps/data/garden/artificial_intelligence/2023-06-14/ai_robots.py b/etl/steps/data/garden/artificial_intelligence/2023-06-14/ai_robots.py
@@ -101,6 +101,7 @@ def run(dest_dir: str) -> None:
 
     # Merge pivot table for professional service robots, application area and sector with aggregates
     merge_all = pd.merge(merge_service, df_agg_clean, on=["year", "country"], how="outer")
+    merge_all["unspecified_others"] = merge_all["Unspecified Sector"] + merge_all["All Others"]
 
     # Set the index as 'country' and 'year'
     merge_all.set_index(["country", "year"], inplace=True)

diff --git a/etl/steps/data/garden/artificial_intelligence/2023-06-26/ai_wrp_2021.meta.yml b/etl/steps/data/garden/artificial_intelligence/2023-06-26/ai_wrp_2021.meta.yml
@@ -66,9 +66,18 @@ tables:
         display:
           numDecimalPlaces: 0
 
+      dk_no_op:
+        title: "Don't have an opinion, Don't know help/harm question"
+        description: Share of respondents said "Don't have an opinion" or "Don't know" to the question "Will Artificial Intelligence help or harm people in the next 20 years?".
+        unit: '%'
+        short_unit: '%'
+        display:
+          name: "No opinion or don't know"
+          numDecimalPlaces: 0
+
       other_help_harm:
-        title: "Don't have an opinion, Don't know, or Refused - help/harm question"
-        description: Share of respondents who refused to answer the question or said "Don't have an opinion" to the question "Will Artificial Intelligence help or harm people in the next 20 years?".
+        title: "Don't have an opinion, Don't know help/harm question or refused"
+        description: Share of respondents said "Don't have an opinion", "Don't know" or refused the question "Will Artificial Intelligence help or harm people in the next 20 years?".
         unit: '%'
         short_unit: '%'
         display:
@@ -83,3 +92,30 @@ tables:
         display:
           name: "Other"
           numDecimalPlaces: 0
+
+      refused__help_harm:
+        title: "Refused - help/harm question"
+        description: Share of respondents who refused to answer the question "Will Artificial Intelligence help or harm people in the next 20 years?".
+        unit: '%'
+        short_unit: '%'
+        display:
+          name: "No response"
+          numDecimalPlaces: 0
+
+      dk__cars:
+        title: "Don't know - self-driving cars question"
+        description: Share of respondents who refused to answer the question or said "Don't know" to the question "Would you feel safe in a car driven by a computer without a human driver?".
+        unit: '%'
+        short_unit: '%'
+        display:
+          name: "Don't know"
+          numDecimalPlaces: 0
+
+      refused__cars:
+        title: "Refused - self-driving cars question"
+        description: Share of respondents who refused to answer the question or said "Don't know" to the question "Would you feel safe in a car driven by a computer without a human driver?".
+        unit: '%'
+        short_unit: '%'
+        display:
+          name: "No response"
+          numDecimalPlaces: 0
diff --git a/etl/steps/data/garden/artificial_intelligence/2023-06-26/ai_wrp_2021.py b/etl/steps/data/garden/artificial_intelligence/2023-06-26/ai_wrp_2021.py
@@ -2,6 +2,7 @@
 
 from typing import cast
 
+import numpy as np
 import pandas as pd
 from owid.catalog import Dataset, Table
 from structlog import get_logger
@@ -57,10 +58,17 @@ def run(dest_dir: str) -> None:
     columns_to_split_by = ["country", "gender", "education", "income_5", "emp_2010", "agegroups4", "globalregion"]
 
     # Dictionary to map response codes to labels for question 9
-    dict_q9 = {1: "Mostly help", 2: "Mostly harm", 3: "Don't have an opinion", 4: "Neither", 98: "DK", 99: "Refused"}
+    dict_q9 = {
+        1: "Mostly help",
+        2: "Mostly harm",
+        3: "Don't have an opinion",
+        4: "Neither",
+        98: "DK(help/harm)",
+        99: "Refused(help/harm)",
+    }
 
     # Dictionary to map response codes to labels for question 8
-    dict_q8 = {1: "Yes, would feel safe", 2: "No, would not feel safe", 98: "DK", 99: "Refused"}
+    dict_q8 = {1: "Yes, would feel safe", 2: "No, would not feel safe", 98: "DK(cars)", 99: "Refused(cars)"}
 
     # Create a list of DataFrames for each column_to_split_by for question 8
     df_q8_list = []
@@ -95,43 +103,28 @@ def run(dest_dir: str) -> None:
                 "No, would not feel safe",
                 "Mostly harm",
                 "Neither",
+                "DK(help/harm)",
+                "Refused(help/harm)",
+                "DK(cars)",
+                "Refused(cars)",
+                "Don't have an opinion",
             ]
         ]
         .dropna(subset=["country"])
         .copy()
     )
-    # Select rows with categories (NaN country rows)
-    world_df = df_merge[df_merge["country"].isna()].copy()
-    world_df.reset_index(drop=True, inplace=True)
 
-    # Set country as World
-    world_df["country"] = world_df["country"].astype(str)
-    world_df.loc[world_df["country"] == "nan", "country"] = "World"
-    # Calculates the percentage of valid responses for the "Mostly help" column in a DataFrame, split by gender, income etc.
-    conc_df_help = pivot_by_category(world_df, "Mostly help")
-    conc_df_harm = pivot_by_category(world_df, "Mostly harm")
-    conc_df_neither = pivot_by_category(world_df, "Neither")
+    merge_rest = calculate_world_data(df_merge, df_without_categories)
 
-    merge_help_harm = pd.merge(conc_df_help, conc_df_harm, on=["year", "country"], how="outer")
-    merge_help_harm_neither = pd.merge(merge_help_harm, conc_df_neither, on=["year", "country"], how="outer")
-
-    # Calculates the percentage of valid responses for a "Yes, would feel safe column in a DataFrame, split by gender, income etc.
-    conc_df_yes = pivot_by_category(world_df, "Yes, would feel safe")
-    conc_df_no = pivot_by_category(world_df, "No, would not feel safe")
-    merge_yes_no = pd.merge(conc_df_yes, conc_df_no, on=["year", "country"], how="outer")
+    tb = Table(merge_rest, short_name=paths.short_name, underscore=True)
 
-    # Merge  all dataframes into one
-    merge_categorized = pd.merge(merge_help_harm_neither, merge_yes_no, on=["year", "country"], how="outer")
-    merge_rest = pd.merge(df_without_categories, merge_categorized, on=["year", "country"], how="outer")
-    merge_rest["other_yes_no"] = 100 - (merge_rest["Yes, would feel safe"] + merge_rest["No, would not feel safe"])
-    merge_rest["other_help_harm"] = 100 - (
-        merge_rest["Mostly help"] + merge_rest["Mostly harm"] + merge_rest["Neither"]
+    tb["dk_no_op"] = tb[["dk__help_harm", "dont_have_an_opinion"]].sum(axis=1).values
+    tb["other_help_harm"] = tb[["dk__help_harm", "dont_have_an_opinion", "refused__help_harm"]].sum(axis=1).values
+    tb["other_yes_no"] = tb[["dk__cars", "refused__cars"]].sum(axis=1).values
+    tb[["dk_no_op", "other_help_harm", "other_yes_no"]] = tb[["dk_no_op", "other_help_harm", "other_yes_no"]].replace(
+        0.0, np.NaN
     )
 
-    merge_rest.set_index(["year", "country"], inplace=True)
-
-    tb = Table(merge_rest, short_name=paths.short_name, underscore=True)
-
     #
     # Save outputs.
     #
@@ -150,7 +143,8 @@ def calculate_percentage(df, column, valid_responses_dict, column_to_split_by):
     Args:
         df (DataFrame): The input DataFrame.
         column (str): The column name to calculate the percentage.
-        valid_responses_dict (dict): A dictionary mapping valid response codes to their corresponding labels.
+        valid_responses_dict (dict): A dictionary mapping vali
+        d response codes to their corresponding labels.
         column_to_split_by (str): The column name to split by.
     Returns:
         DataFrame: A DataFrame with columns: the column_to_split_by, "year", "column", "count", and "percentage".
@@ -210,9 +204,67 @@ def question_extract(q, df, column_to_split_by, dict_q):
     pivoted_df.columns.name = None
 
     if q == "q9":
-        return pivoted_df[["year", column_to_split_by, "Mostly help", "Mostly harm", "Neither"]]
+        return pivoted_df[
+            [
+                "year",
+                column_to_split_by,
+                "Mostly help",
+                "Mostly harm",
+                "Neither",
+                "Don't have an opinion",
+                "DK(help/harm)",
+                "Refused(help/harm)",
+            ]
+        ]
     else:
-        return pivoted_df[["year", column_to_split_by, "Yes, would feel safe", "No, would not feel safe"]]
+        return pivoted_df[
+            ["year", column_to_split_by, "Yes, would feel safe", "No, would not feel safe", "DK(cars)", "Refused(cars)"]
+        ]
+
+
+def calculate_world_data(df_merge, df_without_categories):
+    # Select rows with categories (NaN country rows)
+    world_df = df_merge[df_merge["country"].isna()].copy()
+    world_df.reset_index(drop=True, inplace=True)
+
+    # Set country as World
+    world_df["country"] = world_df["country"].astype(str)
+    world_df.loc[world_df["country"] == "nan", "country"] = "World"
+
+    # Calculate the percentage of valid responses for "Mostly help", "Mostly harm", "Neither" in a DataFrame,
+    # split by gender, income etc.
+    columns_to_calculate = [
+        "Mostly help",
+        "Mostly harm",
+        "Neither",
+        "DK(help/harm)",
+        "Don't have an opinion",
+        "Refused(help/harm)",
+    ]
+    merge_help_harm_all = None
+    for column in columns_to_calculate:
+        conc_df = pivot_by_category(world_df, column)
+        if merge_help_harm_all is None:
+            merge_help_harm_all = conc_df
+        else:
+            merge_help_harm_all = pd.merge(merge_help_harm_all, conc_df, on=["year", "country"], how="outer")
+
+    # Calculate the percentage of valid responses for "Yes, would feel safe" in a DataFrame, split by gender, income etc.
+    columns_to_calculate = ["Yes, would feel safe", "No, would not feel safe", "DK(cars)", "Refused(cars)"]
+    merge_yes_no = None
+    for column in columns_to_calculate:
+        conc_df = pivot_by_category(world_df, column)
+        if merge_yes_no is None:
+            merge_yes_no = conc_df
+        else:
+            merge_yes_no = pd.merge(merge_yes_no, conc_df, on=["year", "country"], how="outer")
+
+    # Merge all dataframes into one
+    merge_categorized = pd.merge(merge_help_harm_all, merge_yes_no, on=["year", "country"], how="outer")
+    merge_rest = pd.merge(df_without_categories, merge_categorized, on=["year", "country"], how="outer")
+
+    merge_rest.set_index(["year", "country"], inplace=True)
+    return merge_rest
 
 
 def map_values(df):

diff --git a/etl/steps/data/garden/artificial_intelligence/2023-06-26/ai_wrp_2021_grouped.meta.yml b/etl/steps/data/garden/artificial_intelligence/2023-06-26/ai_wrp_2021_grouped.meta.yml
@@ -66,20 +66,73 @@ tables:
         display:
           numDecimalPlaces: 0
 
-      other_help_harm:
-        title: "Don't have an opinion, Don't know, or Refused - help/harm question"
-        description: Share of respondents who refused to answer the question or said "Don't have an opinion" to the question "Will Artificial Intelligence help or harm people in the next 20 years?".
+      dk_no_op_value:
+        title: "Don't have an opinion, Don't know help/harm question"
+        description: Share of respondents said "Don't have an opinion" or "Don't know" to the question "Will Artificial Intelligence help or harm people in the next 20 years?".
+        unit: '%'
+        short_unit: '%'
+        display:
+          name: "No opinion or don't know"
+          numDecimalPlaces: 0
+
+      other_help_harm_value:
+        title: "Don't have an opinion, Don't know help/harm question or refused"
+        description: Share of respondents said "Don't have an opinion", "Don't know" or refused the question "Will Artificial Intelligence help or harm people in the next 20 years?".
         unit: '%'
         short_unit: '%'
         display:
           name: "Other"
           numDecimalPlaces: 0
 
-      other_yes_no:
+      other_yes_no_value:
         title: "Don't know or Refused - self-driving cars question"
         description: Share of respondents who refused to answer the question or said "Don't know" to the question "Would you feel safe in a car driven by a computer without a human driver?".
         unit: '%'
         short_unit: '%'
         display:
           name: "Other"
           numDecimalPlaces: 0
+
+      refused__help_harm_value:
+        title: "Refused - help/harm question"
+        description: Share of respondents who refused to answer the question "Will Artificial Intelligence help or harm people in the next 20 years?".
+        unit: '%'
+        short_unit: '%'
+        display:
+          name: "No response"
+          numDecimalPlaces: 0
+
+      dk__cars_value:
+        title: "Don't know - self-driving cars question"
+        description: Share of respondents who refused to answer the question or said "Don't know" to the question "Would you feel safe in a car driven by a computer without a human driver?".
+        unit: '%'
+        short_unit: '%'
+        display:
+          name: "Don't know"
+          numDecimalPlaces: 0
+
+      refused__cars_value:
+        title: "Refused - self-driving cars question"
+        description: Share of respondents who refused to answer the question or said "Don't know" to the question "Would you feel safe in a car driven by a computer without a human driver?".
+        unit: '%'
+        short_unit: '%'
+        display:
+          name: "No response"
+          numDecimalPlaces: 0
+      dk__help_harm_value:
+        title: "DK -  help/harm question"
+        description: Share of respondents who responded "Don't know" to answer the question "Will Artificial Intelligence help or harm people in the next 20 years?".
+        unit: '%'
+        short_unit: '%'
+        display:
+          name: "Don't know"
+          numDecimalPlaces: 0
+
+      dont_have_an_opinion_value:
+        title: "Don't have an opinion -  help/harm question"
+        description: Share of respondents who responded "Don't have an opinion" to answer the question "Will Artificial Intelligence help or harm people in the next 20 years?".
+        unit: '%'
+        short_unit: '%'
+        display:
+          name: "Don't have an opinion"
+          numDecimalPlaces: 0