From 89d65b9bd62bbfd4fad5bc45827def5167ba9b2c Mon Sep 17 00:00:00 2001 From: Pablo Rosado Date: Tue, 27 Aug 2024 10:17:33 +0200 Subject: [PATCH] Add exact-match to etlr options --- etl/command.py | 13 ++++++++++++- etl/steps/__init__.py | 3 ++- tests/test_etl.py | 29 +++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/etl/command.py b/etl/command.py index 9a95a7c3919..73268fa72a7 100644 --- a/etl/command.py +++ b/etl/command.py @@ -92,6 +92,12 @@ is_flag=True, help="Only run the selected step (no upstream or downstream dependencies). Overrides `--downstream` option.", ) +@click.option( + "--exact-match", + "-x", + is_flag=True, + help="Steps should exactly match the arguments (if so, pass the steps with their full name, e.g. 'data://garden/.../step_name').", +) @click.option( "--exclude", "-e", @@ -145,6 +151,7 @@ def main_cli( ipdb: bool = False, downstream: bool = False, only: bool = False, + exact_match: bool = False, exclude: Optional[str] = None, dag_path: Path = paths.DEFAULT_DAG_FILE, workers: int = 1, @@ -195,6 +202,7 @@ def main_cli( export=export, downstream=downstream, only=only, + exact_match=exact_match, exclude=exclude, dag_path=dag_path, workers=workers, @@ -227,6 +235,7 @@ def main( export: bool = False, downstream: bool = False, only: bool = False, + exact_match: bool = False, exclude: Optional[str] = None, dag_path: Path = paths.DEFAULT_DAG_FILE, workers: int = 1, @@ -251,6 +260,7 @@ def main( private=private, downstream=downstream, only=only, + exact_match=exact_match, excludes=excludes, workers=workers, strict=strict, @@ -302,6 +312,7 @@ def run_dag( private: bool = False, downstream: bool = False, only: bool = False, + exact_match: bool = False, excludes: Optional[List[str]] = None, workers: int = 1, strict: Optional[bool] = None, @@ -322,7 +333,7 @@ def run_dag( # but are not supposed to be in DB excludes.append("grapher://grapher/regions/latest/regions") - steps = compile_steps(dag, includes, excludes, downstream=downstream, only=only) + steps = compile_steps(dag, includes, excludes, downstream=downstream, only=only, exact_match=exact_match) if not private: _validate_private_steps(steps) diff --git a/etl/steps/__init__.py b/etl/steps/__init__.py index d19a1eb7369..c8b53545257 100644 --- a/etl/steps/__init__.py +++ b/etl/steps/__init__.py @@ -55,6 +55,7 @@ def compile_steps( excludes: Optional[List[str]] = None, downstream: bool = False, only: bool = False, + exact_match: bool = False, ) -> List["Step"]: """ Return the list of steps which, if executed in order, mean that every @@ -64,7 +65,7 @@ def compile_steps( excludes = excludes or [] # make sure each step runs after its dependencies - steps = to_dependency_order(dag, includes, excludes, downstream=downstream, only=only) + steps = to_dependency_order(dag, includes, excludes, downstream=downstream, only=only, exact_match=exact_match) # parse the steps into Python objects return [parse_step(name, dag) for name in steps] diff --git a/tests/test_etl.py b/tests/test_etl.py index 164b22128d1..34387bb4bc6 100644 --- a/tests/test_etl.py +++ b/tests/test_etl.py @@ -63,3 +63,32 @@ def get_all_steps(filename: Union[str, Path] = paths.DEFAULT_DAG_FILE) -> List[S dag = load_dag(filename) steps = compile_steps(dag, []) return steps + + +def test_get_exact_matches(): + dag = load_dag("tests/data/dag.yml") + + # Try all possible combinations of "exact_match" and "only" arguments, when passing the full step uri as arguments. + assert [s.path for s in compile_steps(dag, includes=["data://test/step_1"], exact_match=True, only=True)] == [ + "test/step_1" + ] + assert [s.path for s in compile_steps(dag, includes=["data://test/step_1"], exact_match=True, only=False)] == [ + "test/step_0", + "test/step_1", + ] + assert [s.path for s in compile_steps(dag, includes=["data://test/step_1"], exact_match=False, only=True)] == [ + "test/step_1" + ] + assert [s.path for s in compile_steps(dag, includes=["data://test/step_1"], exact_match=False, only=False)] == [ + "test/step_0", + "test/step_1", + ] + + # Try all possible combinations of "exact_match" and "only" arguments, when passing a substring of the step uri. + assert [s.path for s in compile_steps(dag, includes=["step_1"], exact_match=True, only=True)] == [] + assert [s.path for s in compile_steps(dag, includes=["step_1"], exact_match=True, only=False)] == [] + assert [s.path for s in compile_steps(dag, includes=["step_1"], exact_match=False, only=True)] == ["test/step_1"] + assert [s.path for s in compile_steps(dag, includes=["step_1"], exact_match=False, only=False)] == [ + "test/step_0", + "test/step_1", + ]