From bb01bbdf8b6ef220aa06d4eb26201da5e7f01422 Mon Sep 17 00:00:00 2001 From: Marigold Date: Thu, 3 Aug 2023 12:18:25 +0200 Subject: [PATCH] :hammer: refactor walkthrough --- .../garden_cookiecutter/cookiecutter.json | 5 -- .../grapher_cookiecutter/cookiecutter.json | 5 -- backport/migrate/migrate.py | 33 ++---------- .../snapshot_cookiecutter/cookiecutter.json | 6 --- dag/walkthrough.yml | 2 +- .../garden/dummy/2020-01-01/dummy.meta.yml | 47 +---------------- .../data/garden/dummy/2020-01-01/dummy.py | 6 ++- .../ggdc/2020-10-01/ggdc_maddison.meta.yml | 2 - .../data/meadow/dummy/2020-01-01/dummy.py | 2 +- fasttrack/cli.py | 2 +- lib/catalog/owid/catalog/meta.py | 2 +- snapshots/dummy/2020-01-01/dummy.csv.dvc | 36 ++++++------- snapshots/dummy/2020-01-01/dummy.py | 2 +- .../ggdc/2020-10-01/ggdc_maddison.xlsx.dvc | 1 - walkthrough/explorers.md | 4 -- walkthrough/explorers.py | 7 ++- .../explorers_cookiecutter/cookiecutter.json | 7 --- .../{{cookiecutter.short_name}}.py | 0 walkthrough/garden.md | 2 - walkthrough/garden.py | 9 ++-- .../garden_cookiecutter/cookiecutter.json | 10 ---- .../{{cookiecutter.short_name}}.meta.yml | 35 ------------- .../playground.ipynb | 0 ...{{cookiecutter.short_name}}.countries.json | 0 ...utter.short_name}}.excluded_countries.json | 0 .../{{cookiecutter.short_name}}.meta.yml | 13 +++++ .../{{cookiecutter.short_name}}.py | 4 +- walkthrough/grapher.md | 4 -- walkthrough/grapher.py | 7 ++- .../grapher_cookiecutter/cookiecutter.json | 7 --- .../{{cookiecutter.short_name}}.py | 0 walkthrough/meadow.md | 2 - walkthrough/meadow.py | 34 +++--------- .../meadow_cookiecutter/cookiecutter.json | 11 ---- .../{{cookiecutter.short_name}}.meta.yml | 34 ------------ .../playground.ipynb | 0 .../{{cookiecutter.short_name}}.py | 2 +- walkthrough/snapshot.md | 2 - walkthrough/snapshot.py | 20 +++---- .../snapshot_cookiecutter/cookiecutter.json | 19 ------- .../{{cookiecutter.short_name}}.py | 2 +- ...name}}.{{cookiecutter.file_extension}}.dvc | 2 +- walkthrough/utils.py | 52 ++++++++++--------- 43 files changed, 105 insertions(+), 335 deletions(-) delete mode 100644 backport/migrate/garden_cookiecutter/cookiecutter.json delete mode 100644 backport/migrate/grapher_cookiecutter/cookiecutter.json delete mode 100644 backport/migrate/snapshot_cookiecutter/cookiecutter.json delete mode 100644 walkthrough/explorers_cookiecutter/cookiecutter.json rename walkthrough/explorers_cookiecutter/{{{cookiecutter.directory_name}} => {{cookiecutter.namespace}}/{{cookiecutter.version}}}/{{cookiecutter.short_name}}.py (100%) delete mode 100644 walkthrough/garden_cookiecutter/cookiecutter.json delete mode 100644 walkthrough/garden_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.meta.yml rename walkthrough/garden_cookiecutter/{{{cookiecutter.directory_name}} => {{cookiecutter.namespace}}/{{cookiecutter.version}}}/playground.ipynb (100%) rename walkthrough/garden_cookiecutter/{{{cookiecutter.directory_name}} => {{cookiecutter.namespace}}/{{cookiecutter.version}}}/{{cookiecutter.short_name}}.countries.json (100%) rename walkthrough/garden_cookiecutter/{{{cookiecutter.directory_name}} => {{cookiecutter.namespace}}/{{cookiecutter.version}}}/{{cookiecutter.short_name}}.excluded_countries.json (100%) create mode 100644 walkthrough/garden_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.version}}/{{cookiecutter.short_name}}.meta.yml rename walkthrough/garden_cookiecutter/{{{cookiecutter.directory_name}} => {{cookiecutter.namespace}}/{{cookiecutter.version}}}/{{cookiecutter.short_name}}.py (89%) delete mode 100644 walkthrough/grapher_cookiecutter/cookiecutter.json rename walkthrough/grapher_cookiecutter/{{{cookiecutter.directory_name}} => {{cookiecutter.namespace}}/{{cookiecutter.version}}}/{{cookiecutter.short_name}}.py (100%) delete mode 100644 walkthrough/meadow_cookiecutter/cookiecutter.json delete mode 100644 walkthrough/meadow_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.meta.yml rename walkthrough/meadow_cookiecutter/{{{cookiecutter.directory_name}} => {{cookiecutter.namespace}}}/playground.ipynb (100%) rename walkthrough/meadow_cookiecutter/{{{cookiecutter.directory_name}} => {{cookiecutter.namespace}}/{{cookiecutter.version}}}/{{cookiecutter.short_name}}.py (88%) delete mode 100644 walkthrough/snapshot_cookiecutter/cookiecutter.json rename walkthrough/snapshot_cookiecutter/{{{cookiecutter.directory_name}} => {{cookiecutter.namespace}}/{{cookiecutter.snapshot_version}}}/{{cookiecutter.short_name}}.py (95%) rename walkthrough/snapshot_cookiecutter/{{{cookiecutter.directory_name}} => {{cookiecutter.namespace}}/{{cookiecutter.snapshot_version}}}/{{cookiecutter.short_name}}.{{cookiecutter.file_extension}}.dvc (92%) diff --git a/backport/migrate/garden_cookiecutter/cookiecutter.json b/backport/migrate/garden_cookiecutter/cookiecutter.json deleted file mode 100644 index 96098bc9a30..00000000000 --- a/backport/migrate/garden_cookiecutter/cookiecutter.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "namespace": "", - "version": "", - "short_name": "" -} diff --git a/backport/migrate/grapher_cookiecutter/cookiecutter.json b/backport/migrate/grapher_cookiecutter/cookiecutter.json deleted file mode 100644 index 96098bc9a30..00000000000 --- a/backport/migrate/grapher_cookiecutter/cookiecutter.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "namespace": "", - "version": "", - "short_name": "" -} diff --git a/backport/migrate/migrate.py b/backport/migrate/migrate.py index d2583695240..3d7810ae77e 100644 --- a/backport/migrate/migrate.py +++ b/backport/migrate/migrate.py @@ -1,21 +1,19 @@ import shutil -import tempfile from pathlib import Path -from typing import Any, Dict, Optional, cast +from typing import Optional, cast import click import structlog -from cookiecutter.main import cookiecutter from owid.catalog.utils import underscore from sqlalchemy.engine import Engine from etl import config from etl.backport_helpers import create_dataset from etl.db import get_engine -from etl.files import apply_black_formatter_to_files, yaml_dump +from etl.files import yaml_dump from etl.metadata_export import metadata_export from etl.paths import DAG_DIR, SNAPSHOTS_DIR, STEP_DIR -from walkthrough.utils import add_to_dag +from walkthrough.utils import add_to_dag, generate_step from ..backport import PotentialBackport @@ -89,31 +87,6 @@ def cli( ) -def generate_step(cookiecutter_path: Path, data: Dict[str, Any], target_dir: Path) -> None: - with tempfile.TemporaryDirectory() as temp_dir: - OUTPUT_DIR = temp_dir - - # generate ingest scripts - cookiecutter( - cookiecutter_path.as_posix(), - no_input=True, - output_dir=temp_dir, - overwrite_if_exists=True, - extra_context=data, - ) - - shutil.copytree( - Path(OUTPUT_DIR), - target_dir, - dirs_exist_ok=True, - ) - - DATASET_DIR = target_dir / data["namespace"] / data["version"] - - # Apply black formatter to generated files. - apply_black_formatter_to_files(file_paths=DATASET_DIR.glob("*.py")) - - def migrate( dataset_id: int, namespace: str, diff --git a/backport/migrate/snapshot_cookiecutter/cookiecutter.json b/backport/migrate/snapshot_cookiecutter/cookiecutter.json deleted file mode 100644 index d607e48f0fa..00000000000 --- a/backport/migrate/snapshot_cookiecutter/cookiecutter.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "namespace": "", - "version": "", - "backport_short_name": "", - "short_name": "" -} diff --git a/dag/walkthrough.yml b/dag/walkthrough.yml index 75d8204ec07..413e39c0dbe 100644 --- a/dag/walkthrough.yml +++ b/dag/walkthrough.yml @@ -7,4 +7,4 @@ steps: data://grapher/dummy/2020-01-01/dummy: - data://garden/dummy/2020-01-01/dummy data://explorers/dummy/2020-01-01/dummy: - - data://garden/dummy/2020-01-01/dummy \ No newline at end of file + - data://garden/dummy/2020-01-01/dummy diff --git a/etl/steps/data/garden/dummy/2020-01-01/dummy.meta.yml b/etl/steps/data/garden/dummy/2020-01-01/dummy.meta.yml index 03f91f1203b..4279cb9df9b 100644 --- a/etl/steps/data/garden/dummy/2020-01-01/dummy.meta.yml +++ b/etl/steps/data/garden/dummy/2020-01-01/dummy.meta.yml @@ -1,50 +1,5 @@ -# (Inherited from meadow, remove if not different.) -all_sources: -dataset: - title: Dummy dataset tables: dummy: - # (Inherited from meadow, remove if not different.) variables: dummy_variable: - title: Dummy - description: This is a dummy indicator with full metadata. # Description can be a long text if need be. - licenses: [] # Licenses is an obsolete field - use origin.license in the snapshot to record upstream licenses and license to specify the redistribution license - unit: Dummy unit - short_unit: Du - display: - isProjection: true - conversionFactor: 1000 - numDecimalPlaces: 1 - tolerance: 5 - yearIsDay: false - zeroDay: 1900-01-01 - entityAnnotationsMap: "Germany: dummy annotation" - includeInTable: true - description_short: Short description of the dummy indicator. - description_from_producer: The description of the dummy indicator by the producer, shown separately on a data page. - processing_level: major - license: - name: CC-BY 4.0 - url: "" - presentation: - grapher_config: # Note that the fields in here use camelCase, not snake_case. All fields of the schema can be used: https://github.com/owid/owid-grapher/tree/master/packages/%40ourworldindata/grapher/src/schema - title: The dummy indicator - chart title - subtitle: You'll never guess where the line will go - hasMapTab: true - selectedEntityNames: - - Germany - - Italy - - France - title_public: The dummy indicator - data page title - title_variant: historical data # This is useful to discern between similar indicators, e.g. if some are projections and some are historical data - producer_short: ACME - attribution: ACME project # This is what we show in places like the lower left side of Grapher charts to say who produced the data. Often this can be empty, in this case we construct this text from the attribution fields on the Origins. - topic_tags_links: # These should exist in the tags table in the grapher db and use the same spelling and case as they do there - - "Internet" - key_info_text: - - "First bullet point info about the data. [Detail on demand link](#dod:primaryenergy)" - - "Second bullet point with **bold** text and a [normal link](https://ourworldindata.org)" - faqs: - - fragment_id: cherries - gdoc_id: 16uGVylqtS-Ipc3OCxqapJ3BEVGjWf648wvZpzio1QFE + unit: dummy unit diff --git a/etl/steps/data/garden/dummy/2020-01-01/dummy.py b/etl/steps/data/garden/dummy/2020-01-01/dummy.py index 2fc0d1d039a..40ba987ca65 100644 --- a/etl/steps/data/garden/dummy/2020-01-01/dummy.py +++ b/etl/steps/data/garden/dummy/2020-01-01/dummy.py @@ -19,7 +19,7 @@ def run(dest_dir: str) -> None: ds_meadow = cast(Dataset, paths.load_dependency("dummy")) # Read table from meadow dataset. - tb = ds_meadow["dummy"] + tb = ds_meadow["dummy"].reset_index() # # Process data. @@ -32,7 +32,9 @@ def run(dest_dir: str) -> None: # Save outputs. # # Create a new garden dataset with the same metadata as the meadow dataset. - ds_garden = create_dataset(dest_dir, tables=[tb], default_metadata=ds_meadow.metadata) + ds_garden = create_dataset( + dest_dir, tables=[tb.set_index(["country", "year"])], default_metadata=ds_meadow.metadata + ) # Save changes in the new garden dataset. ds_garden.save() diff --git a/etl/steps/data/garden/ggdc/2020-10-01/ggdc_maddison.meta.yml b/etl/steps/data/garden/ggdc/2020-10-01/ggdc_maddison.meta.yml index 43ebebbbc9f..6b38960a2c8 100644 --- a/etl/steps/data/garden/ggdc/2020-10-01/ggdc_maddison.meta.yml +++ b/etl/steps/data/garden/ggdc/2020-10-01/ggdc_maddison.meta.yml @@ -1,5 +1,3 @@ -dataset: - title: Maddison Project Database (Bolt and van Zanden, 2020) tables: maddison_gdp: variables: diff --git a/etl/steps/data/meadow/dummy/2020-01-01/dummy.py b/etl/steps/data/meadow/dummy/2020-01-01/dummy.py index 2c36b94eb8a..2d1ea949ab8 100644 --- a/etl/steps/data/meadow/dummy/2020-01-01/dummy.py +++ b/etl/steps/data/meadow/dummy/2020-01-01/dummy.py @@ -32,7 +32,7 @@ def run(dest_dir: str) -> None: # Save outputs. # # Create a new meadow dataset with the same metadata as the snapshot. - ds_meadow = create_dataset(dest_dir, tables=[tb], default_metadata=snap.metadata) + ds_meadow = create_dataset(dest_dir, tables=[tb.set_index(["country", "year"])], default_metadata=snap.metadata) # Save changes in the new garden dataset. ds_meadow.save() diff --git a/fasttrack/cli.py b/fasttrack/cli.py index f9c2dcb3f36..0962c317bb3 100644 --- a/fasttrack/cli.py +++ b/fasttrack/cli.py @@ -239,7 +239,7 @@ def app(dummy_data: bool, commit: bool) -> None: dag_content = _add_to_dag(meta.dataset, form.is_private) # create step and metadata file - walkthrough_utils.generate_step( + walkthrough_utils.generate_step_to_channel( CURRENT_DIR / "grapher_cookiecutter/", dict(**meta.dataset.dict(), channel="grapher") ) fast_import.save_metadata() diff --git a/lib/catalog/owid/catalog/meta.py b/lib/catalog/owid/catalog/meta.py index d8b1c7e8a12..9406e0851aa 100644 --- a/lib/catalog/owid/catalog/meta.py +++ b/lib/catalog/owid/catalog/meta.py @@ -115,7 +115,7 @@ def update(self, **kwargs: Dict[str, Any]) -> None: class Origin: # Dataset title written by OWID (without a year) dataset_title_owid: Optional[str] = None - # Dataset title written producer (without a year) + # Dataset title written by producer (without a year) dataset_title_producer: Optional[str] = None # Our description of the dataset dataset_description_owid: Optional[str] = None diff --git a/snapshots/dummy/2020-01-01/dummy.csv.dvc b/snapshots/dummy/2020-01-01/dummy.csv.dvc index cd1bca189ca..7ebaecf8196 100644 --- a/snapshots/dummy/2020-01-01/dummy.csv.dvc +++ b/snapshots/dummy/2020-01-01/dummy.csv.dvc @@ -1,26 +1,20 @@ meta: name: Dummy dataset - origin: - dataset_title_producer: The best dummy dataset # the dataset title as used by the producer. This should be the default to be filled - dataset_title_owid: Dummy # What we call the dataset if the producer's title does not fit our needs. - dataset_description_producer: This is a description of the best dummy dataset # How the producer describes this dataset - dataset_description_owid: | # How we describe this dataset - also a place to collect important information about the entire dataset. - This is a description of this dummy dataset as we would word it. - - It can be a few paragraphs long if need be. Citation information should not go in here. For - specific information about indicators, prefer to add indciator level descriptions in meadow or garden. - attribution: ACME project # The text we want to appear when we want to credit the "origin" of the data. Should be reasonably short. Can be a project, an institution, actual people. Can include versions or years if that is important. - attribution_short: ACME # The shortest version of the attribution that we think is acceptable. Used in places like "This database is used on data by ..." - producer: Max Mustermann # Name of the institution or people who are the authors of this data. Should never be the name of a project. - citation_producer: Max Mustermann et al based on John Doe. # The citation that the producer asks for, verbatim. - dataset_url_main: http://dummy-project.org # The URL where this dataset is described - dataset_url_download: http://dummy-project.org/download # The URL from where we downloaded the data - date_accessed: 2023-06-29 - date_published: 2020-01-01 - version: "1" # Some datasets release versions over time - if so then here we capture the version identifier the producer used for this release - license: - name: CC-BY 4.0 # The license that governs this dataset - url: http://dummy-project.org/license # The URL where the licensing terms are given + publication_date: '2020-01-01' + source_name: Dummy short source citation + source_published_by: Dummy full source citation + url: https://www.url-dummy.com/ + source_data_url: https://raw.githubusercontent.com/owid/etl/master/walkthrough/dummy_data.csv + license_url: + license_name: '' + date_accessed: 2023-08-03 + is_public: true + description: | + This + is + a + dummy + dataset wdir: ../../../data/snapshots/dummy/2020-01-01 outs: - md5: becb9bc64792f7372580683b384e5411 diff --git a/snapshots/dummy/2020-01-01/dummy.py b/snapshots/dummy/2020-01-01/dummy.py index ad2a1ad4614..2a55ff6169c 100644 --- a/snapshots/dummy/2020-01-01/dummy.py +++ b/snapshots/dummy/2020-01-01/dummy.py @@ -1,4 +1,4 @@ -"""Script to create a snapshot of dataset 'Dummy dataset'.""" +"""Script to create a snapshot of dataset.""" from pathlib import Path diff --git a/snapshots/ggdc/2020-10-01/ggdc_maddison.xlsx.dvc b/snapshots/ggdc/2020-10-01/ggdc_maddison.xlsx.dvc index adac56d646f..a1be95d9500 100644 --- a/snapshots/ggdc/2020-10-01/ggdc_maddison.xlsx.dvc +++ b/snapshots/ggdc/2020-10-01/ggdc_maddison.xlsx.dvc @@ -1,5 +1,4 @@ meta: - name: Maddison Project Database (Bolt and van Zanden, 2020) origin: dataset_title_producer: Maddison Project Database dataset_description_owid: | diff --git a/walkthrough/explorers.md b/walkthrough/explorers.md index 895f2c8f1c8..d7991e39782 100644 --- a/walkthrough/explorers.md +++ b/walkthrough/explorers.md @@ -1,7 +1,3 @@ -# Walkthrough - Explorers - -## Explorers step - Data explorers are Grapher charts expanded with additional functionalities to facilitate exploration, such as dynamic entity filters or customizable menus. They are powered by CSV files generated by ETL [served from S3](https://cloud.digitalocean.com/spaces/owid-catalog?path=explorers/). Explorers data step in ETL is responsible for generating these CSV files. It works in the same way as e.g. garden step, but the transformations made there are meant to get the data ready for the data explorer (and not be consumed by users of catalog). Check out docs about creating [Data explorers](https://www.notion.so/owid/Creating-Data-Explorers-cf47a5ef90f14c1fba8fc243aba79be7). diff --git a/walkthrough/explorers.py b/walkthrough/explorers.py index 6eb6023ef56..a96fc489094 100644 --- a/walkthrough/explorers.py +++ b/walkthrough/explorers.py @@ -38,8 +38,9 @@ def __init__(self, **data: Any) -> None: def app(run_checks: bool) -> None: state = utils.APP_STATE + po.put_markdown("# Walkthrough - Explorers") with open(CURRENT_DIR / "explorers.md", "r") as f: - po.put_markdown(f.read()) + po.put_collapse("Instructions", [po.put_markdown(f.read())]) data = pi.input_group( "Options", @@ -100,7 +101,9 @@ def app(run_checks: bool) -> None: else: dag_content = "" - DATASET_DIR = utils.generate_step(CURRENT_DIR / "explorers_cookiecutter/", dict(**form.dict(), channel="explorers")) + DATASET_DIR = utils.generate_step_to_channel( + CURRENT_DIR / "explorers_cookiecutter/", dict(**form.dict(), channel="explorers") + ) step_path = DATASET_DIR / (form.short_name + ".py") diff --git a/walkthrough/explorers_cookiecutter/cookiecutter.json b/walkthrough/explorers_cookiecutter/cookiecutter.json deleted file mode 100644 index 644d911c2d1..00000000000 --- a/walkthrough/explorers_cookiecutter/cookiecutter.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "directory_name": "", - "short_name": "", - "namespace": "", - "version": "", - "add_to_dag": false -} diff --git a/walkthrough/explorers_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.py b/walkthrough/explorers_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.version}}/{{cookiecutter.short_name}}.py similarity index 100% rename from walkthrough/explorers_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.py rename to walkthrough/explorers_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.version}}/{{cookiecutter.short_name}}.py diff --git a/walkthrough/garden.md b/walkthrough/garden.md index 5008a6ea6e4..7e300fd6e82 100644 --- a/walkthrough/garden.md +++ b/walkthrough/garden.md @@ -1,5 +1,3 @@ -# Walkthrough - Garden - Here's a summary of this walkthrough, you don't have to manually execute anything, all of it will be done automatically after submitting a form below 1. **Create a new garden step** (e.g. `etl/etl/steps/data/garden/example_institution/YYYY-MM-DD/example_dataset.py`). The step must contain a `run(dest_dir)` function that loads data from the last `meadow` step, processes the data and creates a dataset with one or more tables and the necessary metadata. diff --git a/walkthrough/garden.py b/walkthrough/garden.py index 3cf5aac8f69..1350ba58776 100644 --- a/walkthrough/garden.py +++ b/walkthrough/garden.py @@ -56,8 +56,9 @@ def __init__(self, **data: Any) -> None: def app(run_checks: bool) -> None: state = utils.APP_STATE + po.put_markdown("# Walkthrough - Garden") with open(CURRENT_DIR / "garden.md", "r") as f: - po.put_markdown(f.read()) + po.put_collapse("Instructions", [po.put_markdown(f.read())]) data = pi.input_group( "Options", @@ -136,7 +137,9 @@ def app(run_checks: bool) -> None: else: dag_content = "" - DATASET_DIR = utils.generate_step(CURRENT_DIR / "garden_cookiecutter/", dict(**form.dict(), channel="garden")) + DATASET_DIR = utils.generate_step_to_channel( + CURRENT_DIR / "garden_cookiecutter/", dict(**form.dict(), channel="garden") + ) step_path = DATASET_DIR / (form.short_name + ".py") notebook_path = DATASET_DIR / "playground.ipynb" @@ -245,9 +248,7 @@ def _fill_dummy_metadata_yaml(metadata_path: Path) -> None: with open(metadata_path, "r") as f: doc = ruamel.yaml.load(f, Loader=ruamel.yaml.RoundTripLoader) - doc["dataset"]["title"] = "Dummy dataset" doc["tables"]["dummy"]["variables"] = {"dummy_variable": {"unit": "dummy unit"}} - doc["all_sources"][0]["source_testing"]["name"] = "Dummy source" with open(metadata_path, "w") as f: ruamel.yaml.dump(doc, f, Dumper=ruamel.yaml.RoundTripDumper) diff --git a/walkthrough/garden_cookiecutter/cookiecutter.json b/walkthrough/garden_cookiecutter/cookiecutter.json deleted file mode 100644 index a59121bfbbb..00000000000 --- a/walkthrough/garden_cookiecutter/cookiecutter.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "directory_name": "", - "short_name": "", - "namespace": "", - "version": "", - "include_metadata_yaml": false, - "add_to_dag": false, - "load_population": false, - "load_countries_regions": false -} \ No newline at end of file diff --git a/walkthrough/garden_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.meta.yml b/walkthrough/garden_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.meta.yml deleted file mode 100644 index dc3dbb85cea..00000000000 --- a/walkthrough/garden_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.meta.yml +++ /dev/null @@ -1,35 +0,0 @@ -# (Inherited from meadow, remove if not different.) -all_sources: - - source_testing: &source-testing - name: # Example: Testing Short Citation - published_by: # (if different to short citation). Example: Testing Full Citation - url: # Example: https://url_of_testing_source.com/ - date_accessed: # Example: 2023-01-01 - publication_date: # Example: 2023-01-01 - publication_year: # (if publication_date is not given). Example: 2023 - # description: Source description. - -# (Inherited from meadow, remove if not different.) -dataset: - title: # Example: Testing Dataset Name (Institution, 2023) - # description: Dataset description. - licenses: - - name: # Example: Testing License Name - url: # Example: https://url_of_testing_source.com/license - sources: - - *source-testing - -tables: - {{cookiecutter.short_name}}: - # (Inherited from meadow, remove if not different.) - variables: - # testing_variable: - # title: Testing variable title - # unit: arbitrary units - # short_unit: au - # description: Full description of testing variable. - # sources: - # - *source-testing - # display: - # entityAnnotationsMap: Test annotation - # numDecimalPlaces: 0 diff --git a/walkthrough/garden_cookiecutter/{{cookiecutter.directory_name}}/playground.ipynb b/walkthrough/garden_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.version}}/playground.ipynb similarity index 100% rename from walkthrough/garden_cookiecutter/{{cookiecutter.directory_name}}/playground.ipynb rename to walkthrough/garden_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.version}}/playground.ipynb diff --git a/walkthrough/garden_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.countries.json b/walkthrough/garden_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.version}}/{{cookiecutter.short_name}}.countries.json similarity index 100% rename from walkthrough/garden_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.countries.json rename to walkthrough/garden_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.version}}/{{cookiecutter.short_name}}.countries.json diff --git a/walkthrough/garden_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.excluded_countries.json b/walkthrough/garden_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.version}}/{{cookiecutter.short_name}}.excluded_countries.json similarity index 100% rename from walkthrough/garden_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.excluded_countries.json rename to walkthrough/garden_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.version}}/{{cookiecutter.short_name}}.excluded_countries.json diff --git a/walkthrough/garden_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.version}}/{{cookiecutter.short_name}}.meta.yml b/walkthrough/garden_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.version}}/{{cookiecutter.short_name}}.meta.yml new file mode 100644 index 00000000000..4e1e27541be --- /dev/null +++ b/walkthrough/garden_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.version}}/{{cookiecutter.short_name}}.meta.yml @@ -0,0 +1,13 @@ +tables: + {{cookiecutter.short_name}}: + variables: + # testing_variable: + # title: Testing variable title + # unit: arbitrary units + # short_unit: au + # description: Full description of testing variable. + # sources: + # - *source-testing + # display: + # entityAnnotationsMap: Test annotation + # numDecimalPlaces: 0 diff --git a/walkthrough/garden_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.py b/walkthrough/garden_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.version}}/{{cookiecutter.short_name}}.py similarity index 89% rename from walkthrough/garden_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.py rename to walkthrough/garden_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.version}}/{{cookiecutter.short_name}}.py index 43394821daf..bee2d182a55 100644 --- a/walkthrough/garden_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.py +++ b/walkthrough/garden_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.version}}/{{cookiecutter.short_name}}.py @@ -39,7 +39,7 @@ def run(dest_dir: str) -> None: ds_meadow = cast(Dataset, paths.load_dependency("{{cookiecutter.short_name}}")) # Read table from meadow dataset. - tb = ds_meadow["{{cookiecutter.short_name}}"] + tb = ds_meadow["{{cookiecutter.short_name}}"].reset_index() # # Process data. @@ -52,7 +52,7 @@ def run(dest_dir: str) -> None: # Save outputs. # # Create a new garden dataset with the same metadata as the meadow dataset. - ds_garden = create_dataset(dest_dir, tables=[tb], default_metadata=ds_meadow.metadata) + ds_garden = create_dataset(dest_dir, tables=[tb.set_index(["country", "year"])], default_metadata=ds_meadow.metadata) # Save changes in the new garden dataset. ds_garden.save() diff --git a/walkthrough/grapher.md b/walkthrough/grapher.md index 7f9ad95419d..0b7aee97adb 100644 --- a/walkthrough/grapher.md +++ b/walkthrough/grapher.md @@ -1,7 +1,3 @@ -# Walkthrough - Grapher - -## Grapher step - **Grapher step can be only executed by OWID staff** Grapher step is the last phase before upserting the dataset into the database. It works in the same way as the other steps, but the transformations made there are meant to get the data ready for the database (and not be consumed by users of catalog). diff --git a/walkthrough/grapher.py b/walkthrough/grapher.py index 906bf250c75..9cc6ef5c42a 100644 --- a/walkthrough/grapher.py +++ b/walkthrough/grapher.py @@ -40,8 +40,9 @@ def __init__(self, **data: Any) -> None: def app(run_checks: bool) -> None: state = utils.APP_STATE + po.put_markdown("# Walkthrough - Grapher") with open(CURRENT_DIR / "grapher.md", "r") as f: - po.put_markdown(f.read()) + po.put_collapse("Instructions", [po.put_markdown(f.read())]) if run_checks: po.put_markdown("""## Checking `.env` file...""") @@ -117,7 +118,9 @@ def app(run_checks: bool) -> None: else: dag_content = "" - DATASET_DIR = utils.generate_step(CURRENT_DIR / "grapher_cookiecutter/", dict(**form.dict(), channel="grapher")) + DATASET_DIR = utils.generate_step_to_channel( + CURRENT_DIR / "grapher_cookiecutter/", dict(**form.dict(), channel="grapher") + ) step_path = DATASET_DIR / (form.short_name + ".py") diff --git a/walkthrough/grapher_cookiecutter/cookiecutter.json b/walkthrough/grapher_cookiecutter/cookiecutter.json deleted file mode 100644 index 644d911c2d1..00000000000 --- a/walkthrough/grapher_cookiecutter/cookiecutter.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "directory_name": "", - "short_name": "", - "namespace": "", - "version": "", - "add_to_dag": false -} diff --git a/walkthrough/grapher_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.py b/walkthrough/grapher_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.version}}/{{cookiecutter.short_name}}.py similarity index 100% rename from walkthrough/grapher_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.py rename to walkthrough/grapher_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.version}}/{{cookiecutter.short_name}}.py diff --git a/walkthrough/meadow.md b/walkthrough/meadow.md index e17dc25b685..1d3557af6ab 100644 --- a/walkthrough/meadow.md +++ b/walkthrough/meadow.md @@ -1,5 +1,3 @@ -# Walkthrough - Meadow - Here's a summary of this walkthrough, you don't have to manually execute anything, all of it will be done automatically after submitting a form below diff --git a/walkthrough/meadow.py b/walkthrough/meadow.py index 861d50b7b18..0d69bfe95c0 100644 --- a/walkthrough/meadow.py +++ b/walkthrough/meadow.py @@ -21,7 +21,6 @@ class Options(Enum): ADD_TO_DAG = "Add steps into dag/walkthrough.yaml file" - INCLUDE_METADATA_YAML = "Include *.meta.yaml file with metadata" GENERATE_NOTEBOOK = "Generate playground notebook" IS_PRIVATE = "Make dataset private" @@ -34,7 +33,6 @@ class MeadowForm(BaseModel): file_extension: str add_to_dag: bool generate_notebook: bool - include_metadata_yaml: bool is_private: bool def __init__(self, **data: Any) -> None: @@ -42,7 +40,6 @@ def __init__(self, **data: Any) -> None: if data["file_extension"] == "": data["file_extension"] = DEFAULT_EXTENSION data["add_to_dag"] = Options.ADD_TO_DAG.value in options - data["include_metadata_yaml"] = Options.INCLUDE_METADATA_YAML.value in options data["generate_notebook"] = Options.GENERATE_NOTEBOOK.value in options data["is_private"] = Options.IS_PRIVATE.value in options super().__init__(**data) @@ -51,8 +48,9 @@ def __init__(self, **data: Any) -> None: def app(run_checks: bool) -> None: state = utils.APP_STATE + po.put_markdown("# Walkthrough - Meadow") with open(CURRENT_DIR / "meadow.md", "r") as f: - po.put_markdown(f.read()) + po.put_collapse("Instructions", [po.put_markdown(f.read())]) data = pi.input_group( "Options", @@ -101,14 +99,12 @@ def app(run_checks: bool) -> None: "Additional Options", options=[ Options.ADD_TO_DAG.value, - Options.INCLUDE_METADATA_YAML.value, Options.GENERATE_NOTEBOOK.value, Options.IS_PRIVATE.value, ], name="options", value=[ Options.ADD_TO_DAG.value, - # Options.INCLUDE_METADATA_YAML.value, Options.GENERATE_NOTEBOOK.value, ], ), @@ -132,18 +128,16 @@ def app(run_checks: bool) -> None: else: dag_content = "" - DATASET_DIR = utils.generate_step(CURRENT_DIR / "meadow_cookiecutter/", dict(**form.dict(), channel="meadow")) + DATASET_DIR = utils.generate_step_to_channel( + CURRENT_DIR / "meadow_cookiecutter/", dict(**form.dict(), channel="meadow") + ) step_path = DATASET_DIR / (form.short_name + ".py") notebook_path = DATASET_DIR / "playground.ipynb" - metadata_path = DATASET_DIR / (form.short_name + ".meta.yml") if not form.generate_notebook: os.remove(notebook_path) - if not form.include_metadata_yaml: - os.remove(metadata_path) - po.put_markdown( f""" ## Next steps @@ -156,21 +150,7 @@ def app(run_checks: bool) -> None: 2. (Optional) Generated notebook `{notebook_path.relative_to(ETL_DIR)}` can be used to examine the dataset output interactively. -3. (Optional) Generate metadata file `{form.short_name}.meta.yml` from your dataset with - - ``` - poetry run etl-metadata-export data/meadow/{form.namespace}/{form.version}/{form.short_name} -o etl/steps/data/meadow/{form.namespace}/{form.version}/{form.short_name}.meta.yml - ``` - - then manual edit it and rerun the step again with - - ``` - poetry run etl data{private_suffix}://meadow/{form.namespace}/{form.version}/{form.short_name} {"--private" if form.is_private else ""} - ``` - - Note that metadata is inherited from previous step (snapshot) and you don't have to repeat it. - -4. Continue to the garden step +3. Continue to the garden step """ ) po.put_buttons(["Go to garden"], [lambda: go_app("garden", new_window=False)]) @@ -181,8 +161,6 @@ def app(run_checks: bool) -> None: """ ) - if form.include_metadata_yaml: - utils.preview_file(metadata_path, "yaml") utils.preview_file(step_path, "python") if dag_content: diff --git a/walkthrough/meadow_cookiecutter/cookiecutter.json b/walkthrough/meadow_cookiecutter/cookiecutter.json deleted file mode 100644 index ab206b8407d..00000000000 --- a/walkthrough/meadow_cookiecutter/cookiecutter.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "directory_name": "", - "short_name": "", - "namespace": "", - "version": "", - "snapshot_version": "", - "file_extension": "", - "include_metadata_yaml": false, - "add_to_dag": false, - "is_private": false -} diff --git a/walkthrough/meadow_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.meta.yml b/walkthrough/meadow_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.meta.yml deleted file mode 100644 index 085a497c347..00000000000 --- a/walkthrough/meadow_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.meta.yml +++ /dev/null @@ -1,34 +0,0 @@ -# (Inherited from snapshot, remove if not different.) -all_sources: - - source_testing: &source-testing - name: # Example: Testing Short Citation - published_by: # (if different to short citation). Example: Testing Full Citation - url: # Example: https://url_of_testing_source.com/ - date_accessed: # Example: 2023-01-01 - publication_date: # Example: 2023-01-01 - publication_year: # (if publication_date is not given). Example: 2023 - # description: Source description. - -# (Inherited from snapshot, remove if not different.) -dataset: - title: # Example: Testing Dataset Name (Institution, 2023) - # description: Dataset description. - licenses: - - name: # Example: Testing License Name - url: # Example: https://url_of_testing_source.com/license - sources: - - *source-testing - -tables: - {{cookiecutter.short_name}}: - variables: - # testing_variable: - # title: Testing variable title - # unit: arbitrary units - # short_unit: au - # description: Full description of testing variable. - # sources: - # - *source-testing - # display: - # entityAnnotationsMap: Test annotation - # numDecimalPlaces: 0 diff --git a/walkthrough/meadow_cookiecutter/{{cookiecutter.directory_name}}/playground.ipynb b/walkthrough/meadow_cookiecutter/{{cookiecutter.namespace}}/playground.ipynb similarity index 100% rename from walkthrough/meadow_cookiecutter/{{cookiecutter.directory_name}}/playground.ipynb rename to walkthrough/meadow_cookiecutter/{{cookiecutter.namespace}}/playground.ipynb diff --git a/walkthrough/meadow_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.py b/walkthrough/meadow_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.version}}/{{cookiecutter.short_name}}.py similarity index 88% rename from walkthrough/meadow_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.py rename to walkthrough/meadow_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.version}}/{{cookiecutter.short_name}}.py index 3e1e7881e34..1ba22920fb5 100644 --- a/walkthrough/meadow_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.py +++ b/walkthrough/meadow_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.version}}/{{cookiecutter.short_name}}.py @@ -32,7 +32,7 @@ def run(dest_dir: str) -> None: # Save outputs. # # Create a new meadow dataset with the same metadata as the snapshot. - ds_meadow = create_dataset(dest_dir, tables=[tb], default_metadata=snap.metadata) + ds_meadow = create_dataset(dest_dir, tables=[tb.set_index(["country", "year"])], default_metadata=snap.metadata) # Save changes in the new garden dataset. ds_meadow.save() diff --git a/walkthrough/snapshot.md b/walkthrough/snapshot.md index feb9fda23ff..29324c113c3 100644 --- a/walkthrough/snapshot.md +++ b/walkthrough/snapshot.md @@ -1,5 +1,3 @@ -# Walkthrough - Snapshot - Here's a summary of this walkthrough, you don't have to manually execute anything, all of it will be done automatically after submitting a form below 1. **Create an ingest script** (e.g. `snapshots/namespace/version/dummy.py`) to download the data from its original source and upload it as a new data snapshot into `s3://owid-catalog/snapshots`. This step can also be done manually (although it is preferable to do it via script, to have a record of how the data was obtained, and to be able to repeat the process in the future, for instance if another version of the data is released). diff --git a/walkthrough/snapshot.py b/walkthrough/snapshot.py index 457757c14a5..91687c3ec39 100644 --- a/walkthrough/snapshot.py +++ b/walkthrough/snapshot.py @@ -10,6 +10,8 @@ from pywebio import output as po from pywebio.session import go_app +from etl.paths import SNAPSHOTS_DIR + from . import utils CURRENT_DIR = Path(__file__).parent @@ -52,8 +54,9 @@ def version(self) -> str: def app(run_checks: bool) -> None: state = utils.APP_STATE + po.put_markdown("# Walkthrough - Snapshot") with open(CURRENT_DIR / "snapshot.md", "r") as f: - po.put_markdown(f.read()) + po.put_collapse("Instructions", [po.put_markdown(f.read())]) # run checks if run_checks: @@ -179,16 +182,15 @@ def app(run_checks: bool) -> None: # save form data to global state for next steps state.update(form.dict()) - # use multi-line description - form.description = form.description.replace("\n", "\n ") - # cookiecutter on python files - SNAPSHOT_DIR = utils.generate_step( - CURRENT_DIR / "snapshot_cookiecutter/", dict(**form.dict(), version=form.snapshot_version, channel="snapshots") + utils.generate_step( + CURRENT_DIR / "snapshot_cookiecutter/", + dict(**form.dict(), channel="snapshots"), + SNAPSHOTS_DIR, ) - ingest_path = SNAPSHOT_DIR / (form.short_name + ".py") - meta_path = SNAPSHOT_DIR / f"{form.short_name}.{form.file_extension}.dvc" + ingest_path = SNAPSHOTS_DIR / form.namespace / form.snapshot_version / (form.short_name + ".py") + meta_path = SNAPSHOTS_DIR / form.namespace / form.snapshot_version / f"{form.short_name}.{form.file_extension}.dvc" po.put_markdown( f""" @@ -198,7 +200,7 @@ def app(run_checks: bool) -> None: 2. Run the snapshot step to upload files to S3 ```bash -python snapshots/{form.namespace}/{form.version}/{form.short_name}.py +python snapshots/{form.namespace}/{form.snapshot_version}/{form.short_name}.py ``` 3. Continue to the meadow step diff --git a/walkthrough/snapshot_cookiecutter/cookiecutter.json b/walkthrough/snapshot_cookiecutter/cookiecutter.json deleted file mode 100644 index 93454c0a48a..00000000000 --- a/walkthrough/snapshot_cookiecutter/cookiecutter.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "directory_name": "", - "short_name": "", - "namespace": "", - "snapshot_version": "", - "name": "", - "source_name": "", - "source_published_by": "", - "publication_year": "", - "publication_date": "", - "url": "", - "source_data_url": "", - "file_extension": "", - "license_name": "", - "license_url": "", - "description": "", - "is_private": "", - "dataset_manual_import": "" -} \ No newline at end of file diff --git a/walkthrough/snapshot_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.py b/walkthrough/snapshot_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.snapshot_version}}/{{cookiecutter.short_name}}.py similarity index 95% rename from walkthrough/snapshot_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.py rename to walkthrough/snapshot_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.snapshot_version}}/{{cookiecutter.short_name}}.py index 73285a8b522..5cddedc4c95 100644 --- a/walkthrough/snapshot_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.py +++ b/walkthrough/snapshot_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.snapshot_version}}/{{cookiecutter.short_name}}.py @@ -1,4 +1,4 @@ -"""Script to create a snapshot of dataset '{{cookiecutter.name}}'.""" +"""Script to create a snapshot of dataset.""" from pathlib import Path diff --git a/walkthrough/snapshot_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.{{cookiecutter.file_extension}}.dvc b/walkthrough/snapshot_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.snapshot_version}}/{{cookiecutter.short_name}}.{{cookiecutter.file_extension}}.dvc similarity index 92% rename from walkthrough/snapshot_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.{{cookiecutter.file_extension}}.dvc rename to walkthrough/snapshot_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.snapshot_version}}/{{cookiecutter.short_name}}.{{cookiecutter.file_extension}}.dvc index 2a5f01db91d..c9bcf4a3da2 100644 --- a/walkthrough/snapshot_cookiecutter/{{cookiecutter.directory_name}}/{{cookiecutter.short_name}}.{{cookiecutter.file_extension}}.dvc +++ b/walkthrough/snapshot_cookiecutter/{{cookiecutter.namespace}}/{{cookiecutter.snapshot_version}}/{{cookiecutter.short_name}}.{{cookiecutter.file_extension}}.dvc @@ -15,4 +15,4 @@ meta: date_accessed: {% now 'local', '%Y-%m-%d' %} is_public: {% if cookiecutter.is_private == "True" %}false{% else %}true{% endif %} description: | - {{cookiecutter.description.replace("\n", "\n ")}} + {{cookiecutter.description.replace("\n", "\n ")}} diff --git a/walkthrough/utils.py b/walkthrough/utils.py index b2ae7b825bb..275ff066f85 100644 --- a/walkthrough/utils.py +++ b/walkthrough/utils.py @@ -1,3 +1,4 @@ +import json import shutil import tempfile from pathlib import Path @@ -16,7 +17,6 @@ DAG_DIR, LATEST_POPULATION_VERSION, LATEST_REGIONS_VERSION, - SNAPSHOTS_DIR, STEP_DIR, ) from etl.steps import DAG @@ -121,38 +121,40 @@ def remove_from_dag(step: str, dag_path: Path = DAG_WALKTHROUGH_PATH) -> None: ruamel.yaml.dump(doc, f, Dumper=ruamel.yaml.RoundTripDumper) -def generate_step(cookiecutter_path: Path, data: Dict[str, Any]) -> Path: - assert {"channel", "namespace", "version"} <= data.keys() - +def generate_step(cookiecutter_path: Path, data: Dict[str, Any], target_dir: Path) -> None: with tempfile.TemporaryDirectory() as temp_dir: - OUTPUT_DIR = temp_dir - - # generate ingest scripts - cookiecutter( - cookiecutter_path.as_posix(), - no_input=True, - output_dir=temp_dir, - overwrite_if_exists=True, - extra_context=dict(directory_name=data["channel"], **data), - ) + # create config file with data for cookiecutter + config_path = cookiecutter_path / "cookiecutter.json" + with open(config_path, "w") as f: + json.dump(data, f) + + try: + cookiecutter( + cookiecutter_path.as_posix(), + no_input=True, + output_dir=temp_dir, + overwrite_if_exists=True, + extra_context=data, + ) + finally: + config_path.unlink() - if data["channel"] == "walden": - DATASET_DIR = WALDEN_INGEST_DIR / data["namespace"] / data["version"] - elif data["channel"] == "snapshots": - DATASET_DIR = SNAPSHOTS_DIR / data["namespace"] / data["version"] - else: - DATASET_DIR = STEP_DIR / "data" / data["channel"] / data["namespace"] / data["version"] + # Apply black formatter to generated files. + apply_black_formatter_to_files(file_paths=list(Path(temp_dir).glob("**/*.py"))) shutil.copytree( - Path(OUTPUT_DIR) / data["channel"], - DATASET_DIR, + Path(temp_dir), + target_dir, dirs_exist_ok=True, ) - # Apply black formatter to generated step files. - apply_black_formatter_to_files(file_paths=DATASET_DIR.glob("*.py")) - return DATASET_DIR +def generate_step_to_channel(cookiecutter_path: Path, data: Dict[str, Any]) -> Path: + assert {"channel", "namespace", "version"} <= data.keys() + + target_dir = STEP_DIR / "data" / data["channel"] + generate_step(cookiecutter_path, data, target_dir) + return target_dir / data["namespace"] / data["version"] def _check_env() -> bool: