Skip to content

Commit

Permalink
✨ Use real users on staging servers (#3341)
Browse files Browse the repository at this point in the history
* ✨ Use real users on staging servers
  • Loading branch information
Marigold authored Nov 1, 2024
1 parent 22ff46a commit e49e43a
Show file tree
Hide file tree
Showing 15 changed files with 90 additions and 69 deletions.
27 changes: 15 additions & 12 deletions apps/chart_sync/admin_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def is_502_error(exception):


class AdminAPI(object):
def __init__(self, owid_env: OWIDEnv, grapher_user_id: Optional[int] = None):
def __init__(self, owid_env: OWIDEnv, grapher_user_id: Optional[int] = GRAPHER_USER_ID):
self.owid_env = owid_env
self.session_id = create_session_id(owid_env, grapher_user_id)

Expand All @@ -39,6 +39,12 @@ def _json_from_response(self, resp: requests.Response) -> dict:
raise AdminAPIError(resp.text) from e
return js

def _get_session_id(self, user_id: Optional[int] = None) -> str:
"""Return the session id for the given user, or the default session id."""
if user_id:
return create_session_id(self.owid_env, user_id)
return self.session_id

def get_chart_config(self, chart_id: int) -> dict:
resp = requests.get(
f"{self.owid_env.admin_api}/charts/{chart_id}.config.json",
Expand All @@ -47,32 +53,32 @@ def get_chart_config(self, chart_id: int) -> dict:
js = self._json_from_response(resp)
return js

def create_chart(self, chart_config: dict) -> dict:
def create_chart(self, chart_config: dict, user_id: Optional[int] = None) -> dict:
resp = requests.post(
self.owid_env.admin_api + "/charts",
cookies={"sessionid": self.session_id},
cookies={"sessionid": self._get_session_id(user_id)},
json=chart_config,
)
js = self._json_from_response(resp)
if not js["success"]:
raise AdminAPIError({"error": js["error"], "chart_config": chart_config})
return js

def update_chart(self, chart_id: int, chart_config: dict) -> dict:
def update_chart(self, chart_id: int, chart_config: dict, user_id: Optional[int] = None) -> dict:
resp = requests.put(
f"{self.owid_env.admin_api}/charts/{chart_id}",
cookies={"sessionid": self.session_id},
cookies={"sessionid": self._get_session_id(user_id)},
json=chart_config,
)
js = self._json_from_response(resp)
if not js["success"]:
raise AdminAPIError({"error": js["error"], "chart_config": chart_config})
return js

def set_tags(self, chart_id: int, tags: List[Dict[str, Any]]) -> dict:
def set_tags(self, chart_id: int, tags: List[Dict[str, Any]], user_id: Optional[int] = None) -> dict:
resp = requests.post(
f"{self.owid_env.admin_api}/charts/{chart_id}/setTags",
cookies={"sessionid": self.session_id},
cookies={"sessionid": self._get_session_id(user_id)},
json={"tags": tags},
)
js = self._json_from_response(resp)
Expand Down Expand Up @@ -107,13 +113,10 @@ def delete_grapher_config(self, variable_id: int) -> dict:


@cache
def create_session_id(owid_env: OWIDEnv, grapher_user_id: Optional[int] = None) -> str:
def create_session_id(owid_env: OWIDEnv, grapher_user_id: int) -> str:
engine = owid_env.get_engine()
with Session(engine) as session:
if grapher_user_id:
user = session.get(gm.User, grapher_user_id)
else:
user = session.get(gm.User, GRAPHER_USER_ID)
user = session.get(gm.User, grapher_user_id)
assert user
session_id = _create_user_session(session, user.email)
session.commit()
Expand Down
9 changes: 6 additions & 3 deletions apps/chart_sync/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ def cli(
# Map variable IDs from source to target
migrated_config = diff.source_chart.migrate_config(source_session, target_session)

# Get user who edited the chart
user_id = diff.source_chart.lastEditedByUserId

# Chart in target exists, update it
if diff.target_chart:
# Configs are equal, no need to update
Expand All @@ -174,7 +177,7 @@ def cli(
log.info("chart_sync.chart_update", slug=chart_slug, chart_id=chart_id)
charts_synced += 1
if not dry_run:
target_api.update_chart(chart_id, migrated_config)
target_api.update_chart(chart_id, migrated_config, user_id=user_id)

# Rejected chart diff
elif diff.is_rejected:
Expand Down Expand Up @@ -202,8 +205,8 @@ def cli(
if diff.is_approved:
charts_synced += 1
if not dry_run:
resp = target_api.create_chart(migrated_config)
target_api.set_tags(resp["chartId"], chart_tags)
resp = target_api.create_chart(migrated_config, user_id=user_id)
target_api.set_tags(resp["chartId"], chart_tags, user_id=user_id)
else:
resp = {"chartId": None}
log.info(
Expand Down
10 changes: 4 additions & 6 deletions apps/owidbot/chart_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,9 @@ def format_chart_diff(df: pd.DataFrame) -> str:
return f"""
<ul>
<li>{num_charts_reviewed}/{num_charts} reviewed charts</li>
<ul>
<li>Modified: {num_charts_modified_reviewed}/{num_charts_modified}</li>
<li>New: {num_charts_new_reviewed}/{num_charts_new}</li>
<li>Rejected: {num_charts_rejected}</li>
{errors}
</ul>
<li>Modified: {num_charts_modified_reviewed}/{num_charts_modified}</li>
<li>New: {num_charts_new_reviewed}/{num_charts_new}</li>
<li>Rejected: {num_charts_rejected}</li>
{errors}
</ul>
""".strip()
11 changes: 5 additions & 6 deletions apps/wizard/app_pages/chart_diff/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from apps.wizard.app_pages.chart_diff.utils import WARN_MSG, get_engines
from apps.wizard.utils import set_states
from apps.wizard.utils.components import Pagination
from etl import config
from etl.config import OWID_ENV

log = get_logger()
Expand Down Expand Up @@ -50,11 +49,11 @@
CHART_PER_PAGE = 10
# WARN_MSG += ["This tool is being developed! Please report any issues you encounter in `#proj-new-data-workflow`"]

if str(config.GRAPHER_USER_ID) != "1":
WARN_MSG.append(
"`GRAPHER_USER_ID` from your .env is not set to 1 (Admin user). Please modify your .env or use STAGING=1 flag to set it automatically. "
"All changes on staging servers must be done with Admin user."
)
# if str(config.GRAPHER_USER_ID) != "1":
# WARN_MSG.append(
# "`GRAPHER_USER_ID` from your .env is not set to 1 (Admin user). Please modify your .env or use STAGING=1 flag to set it automatically. "
# "All changes on staging servers must be done with Admin user."
# )

if WARN_MSG:
st.warning("- " + "\n\n- ".join(WARN_MSG))
Expand Down
42 changes: 16 additions & 26 deletions apps/wizard/app_pages/chart_diff/chart_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ def load_df(self, chart_ids: List[int] | None = None) -> pd.DataFrame:
"""Load changes in charts between environments from sessions."""
with Session(self.source_engine) as source_session:
with Session(self.target_engine) as target_session:
return modified_charts_by_admin(source_session, target_session, chart_ids=chart_ids)
return modified_charts_on_staging(source_session, target_session, chart_ids=chart_ids)

@property
def chart_ids_all(self):
Expand Down Expand Up @@ -540,7 +540,7 @@ def get_diffs_summary_df(self, cache: bool = False, **kwargs) -> pd.DataFrame:
return pd.DataFrame(summary)


def _modified_data_metadata_by_admin(
def _modified_data_metadata_on_staging(
source_session: Session, target_session: Session, chart_ids: Optional[List[int]] = None
) -> pd.DataFrame:
"""
Expand Down Expand Up @@ -570,23 +570,20 @@ def _modified_data_metadata_by_admin(
join datasets as d on v.datasetId = d.id
where v.dataChecksum is not null and v.metadataChecksum is not null and
"""
# NOTE: We assume that all changes on staging server are done by Admin user with ID = 1. This is
# set automatically if you use STAGING env variable.
where = """
-- include all charts from datasets that have been updated
(d.dataEditedByUserId = 1 or d.metadataEditedByUserId = 1)
-- only compare data/metadata that have been updated on staging server
(d.dataEditedAt >= %(timestamp_staging_creation)s or d.metadataEditedAt >= %(timestamp_staging_creation)s)
"""
query_source = base_q + where
params = {"timestamp_staging_creation": get_staging_creation_time(source_session)}
# Add filter for chart IDs
if chart_ids is not None:
where_charts = """
-- filter and get only charts with given IDs
cd.chartId in %(chart_ids)s
"""
query_source += " and " + where_charts
params = {"chart_ids": tuple(chart_ids)}
else:
params = {}
params["chart_ids"] = tuple(chart_ids)
source_df = read_sql(query_source, source_session, params=params)

# no charts, return empty dataframe
Expand Down Expand Up @@ -634,7 +631,7 @@ def _modified_data_metadata_by_admin(
return diff


def _modified_chart_configs_by_admin(
def _modified_chart_configs_on_staging(
source_session: Session, target_session: Session, chart_ids: Optional[List[int]] = None
) -> pd.DataFrame:
TIMESTAMP_STAGING_CREATION = get_staging_creation_time(source_session)
Expand All @@ -651,25 +648,22 @@ def _modified_chart_configs_by_admin(
join chart_configs as cc on c.configId = cc.id
where
"""
# NOTE: We assume that all changes on staging server are done by Admin user with ID = 1. This is
# set automatically if you use STAGING env variable.
where = """
-- only compare charts that have been updated on staging server by Admin user
-- only compare charts that have been updated on staging server
(
c.lastEditedByUserId = 1 or c.publishedByUserId = 1
c.lastEditedAt >= %(timestamp_staging_creation)s
)
"""
query_source = base_q + where
params = {"timestamp_staging_creation": TIMESTAMP_STAGING_CREATION}
# Add filter for chart IDs
if chart_ids is not None:
where_charts = """
-- filter and get only charts with given IDs
c.id in %(chart_ids)s
"""
query_source += " and " + where_charts
params = {"chart_ids": tuple(chart_ids)}
else:
params = {}
params["chart_ids"] = tuple(chart_ids)
source_df = read_sql(query_source, source_session, params=params)

# no charts, return empty dataframe
Expand All @@ -694,23 +688,19 @@ def _modified_chart_configs_by_admin(
diff["configEdited"] = source_df["chartChecksum"] != target_df["chartChecksum"]

# Add flag 'edited in staging'
diff["chartEditedInStaging"] = source_df["chartLastEditedAt"] >= TIMESTAMP_STAGING_CREATION

assert (
diff["chartEditedInStaging"].notna().all()
), "chartEditedInStaging has missing values! This might be due to `diff` and `eidted` dataframes not having the same number of rows."
diff["chartEditedInStaging"] = True

# Remove charts with no changes
return diff[["configEdited", "chartEditedInStaging"]]


def modified_charts_by_admin(
def modified_charts_on_staging(
source_session: Session, target_session: Session, chart_ids: Optional[List[int]] = None
) -> pd.DataFrame:
"""Get charts that have been modified in staging.
- It includes charts with different config, data or metadata checksums.
- It assumes that all changes on staging server are done by Admin user with ID = 1. That is, if there are changes by a different user in staging, they are not included.
- It detects changes by comparing updatedAt timestamps to staging creation time.
Optionally, you can provide a list of chart IDs to filter the results.
Expand All @@ -723,8 +713,8 @@ def modified_charts_by_admin(
TESTING:
- chartEditedInStaging: True if the chart config has been edited in staging.
"""
df_config = _modified_chart_configs_by_admin(source_session, target_session, chart_ids=chart_ids)
df_data_metadata = _modified_data_metadata_by_admin(source_session, target_session, chart_ids=chart_ids)
df_config = _modified_chart_configs_on_staging(source_session, target_session, chart_ids=chart_ids)
df_data_metadata = _modified_data_metadata_on_staging(source_session, target_session, chart_ids=chart_ids)

df = df_config.join(df_data_metadata, how="outer").fillna(False)

Expand Down
6 changes: 5 additions & 1 deletion apps/wizard/app_pages/chart_diff/conflict_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,12 +155,16 @@ def resolve_conflicts(self, rerun: bool = False):
# Verify config
config_new = validate_chart_config_and_set_defaults(config, schema=get_schema_from_url(config["$schema"]))

api = AdminAPI(SOURCE, grapher_user_id=1)
# User who last edited the chart
user_id = self.diff.source_chart.lastEditedByUserId

api = AdminAPI(SOURCE)
try:
# Push new chart to staging
api.update_chart(
chart_id=self.diff.chart_id,
chart_config=config_new,
user_id=user_id,
)
except HTTPError as e:
log.error(e)
Expand Down
8 changes: 2 additions & 6 deletions apps/wizard/app_pages/indicator_upgrade/charts_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,7 @@ def get_affected_charts_and_preview(indicator_mapping: Dict[int, int]) -> List[g
def push_new_charts(charts: List[gm.Chart]) -> None:
"""Updating charts in the database."""
# API to interact with the admin tool
# HACK: Forcing grapher user to be Admin so that it is detected by chart sync.
api = AdminAPI(OWID_ENV, grapher_user_id=1)
api = AdminAPI(OWID_ENV)
# Update charts
progress_text = "Updating charts..."
bar = st.progress(0, progress_text)
Expand All @@ -114,10 +113,7 @@ def push_new_charts(charts: List[gm.Chart]) -> None:
chart_id = chart.config["id"]
else:
raise ValueError(f"Chart {chart} does not have an ID in config.")
api.update_chart(
chart_id=chart_id,
chart_config=config_new,
)
api.update_chart(chart_id=chart_id, chart_config=config_new, user_id=chart.lastEditedByUserId)
# Show progress bar
percent_complete = int(100 * (i + 1) / len(charts))
bar.progress(percent_complete, text=f"{progress_text} {percent_complete}%")
Expand Down
8 changes: 8 additions & 0 deletions etl/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# etl
#

import datetime as dt
import re
import sys
import tempfile
Expand Down Expand Up @@ -1216,3 +1217,10 @@ def get_schema_from_url(schema_url: str) -> dict:
Schema of a chart configuration.
"""
return requests.get(schema_url, timeout=20, verify=TLS_VERIFY).json()


def last_date_accessed(tb: Table) -> str:
"""Get maximum date_accessed from all origins in the table and display it in a specific format. This
should be a replacement for {TODAY} in YAML templates."""
date_accessed = max([origin.date_accessed for col in tb.columns for origin in tb[col].m.origins])
return dt.datetime.strptime(date_accessed, "%Y-%m-%d").strftime("%d %B %Y")
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ definitions:
desc_wildfires: &desc_wildfires
- Wildfires are detected through the use of satellite imagery obtained from MODIS (Moderate Resolution Imaging Spectroradiometer) and VIIRS (Visible Infrared Imaging Radiometer Suite). These satellite systems are capable of identifying thermal anomalies and alterations in landscape patterns, which are indicative of burning.
- The data provider is presently engaged in a global accuracy assessment and acknowledged that they might be underestimating the genuine impact of wildfires, primarily due to constraints imposed by the spatial resolution of the sensors they employ.
desc_update: The 2024 data is incomplete and was last updated {TODAY}.
desc_update: The 2024 data is incomplete and was last updated {date_accessed}.
# Learn more about the available fields:
# http://docs.owid.io/projects/etl/architecture/metadata/reference/dataset/
dataset:
Expand Down
9 changes: 7 additions & 2 deletions etl/steps/data/garden/climate/latest/weekly_wildfires.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
"""Load a meadow dataset and create a garden dataset."""


import numpy as np
import owid.catalog.processing as pr
import pandas as pd

from etl.data_helpers import geo
from etl.helpers import PathFinder, create_dataset
from etl.helpers import PathFinder, create_dataset, last_date_accessed

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)
Expand Down Expand Up @@ -103,7 +104,11 @@ def run(dest_dir: str) -> None:
#
# Create a new garden dataset with the same metadata as the meadow dataset.
ds_garden = create_dataset(
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata
dest_dir,
tables=[tb],
check_variables_metadata=True,
default_metadata=ds_meadow.metadata,
yaml_params={"date_accessed": last_date_accessed(tb)},
)

# Save changes in the new garden dataset.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ definitions:
temperature_anomaly_above: Calculated when the average surface temperature for a year is higher than the 1991-2020 average, this anomaly indicates a hotter-than-average temperature for that year.
temperature_anomaly_below: Calculated when the average surface temperature for a year is lower than the 1991-2020 average, this anomaly indicates a colder-than-average temperature for that year.
desc_update: The 2024 data is incomplete and was last updated {TODAY}.
desc_update: The 2024 data is incomplete and was last updated {date_accessed}.

dataset:
title: Annual surface temperatures and anomalies by country
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import owid.catalog.processing as pr

from etl.helpers import PathFinder, create_dataset
from etl.helpers import PathFinder, create_dataset, last_date_accessed

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)
Expand Down Expand Up @@ -65,7 +65,11 @@ def run(dest_dir: str) -> None:
#
# Create a new grapher dataset with the same metadata as the garden dataset.
ds_grapher = create_dataset(
dest_dir, tables=[combined], default_metadata=ds_garden.metadata, check_variables_metadata=True
dest_dir,
tables=[combined],
default_metadata=ds_garden.metadata,
check_variables_metadata=True,
yaml_params={"date_accessed": last_date_accessed(combined)},
)

ds_grapher.save()
Loading

0 comments on commit e49e43a

Please sign in to comment.