Skip to content

Commit 5a1be5c

Browse files
committed
[components] Add --rebuild-component-registry option, build cache on code loc construction
1 parent f4ca960 commit 5a1be5c

File tree

8 files changed

+219
-50
lines changed

8 files changed

+219
-50
lines changed

python_modules/libraries/dagster-dg/dagster_dg/cache.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,15 @@ def __init__(self, root_path: Path, logging_enabled: bool):
4747
self._root_path.mkdir(parents=True, exist_ok=True)
4848
self._logging_enabled = logging_enabled
4949

50-
def clear(self) -> None:
50+
def clear_key(self, key: Tuple[str, ...]) -> None:
51+
path = self._get_path(key)
52+
if path.exists():
53+
path.unlink()
54+
self.log(f"CACHE [clear-key]: {path}")
55+
56+
def clear_all(self) -> None:
5157
shutil.rmtree(self._root_path)
52-
self.log(f"CACHE [clear]: {self._root_path}")
58+
self.log(f"CACHE [clear-all]: {self._root_path}")
5359

5460
def get(self, key: Tuple[str, ...]) -> Optional[str]:
5561
path = self._get_path(key)

python_modules/libraries/dagster-dg/dagster_dg/cli/__init__.py

Lines changed: 61 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import sys
12
from pathlib import Path
23

34
import click
@@ -7,6 +8,14 @@
78
from dagster_dg.cli.info import info_cli
89
from dagster_dg.cli.list import list_cli
910
from dagster_dg.config import DgConfig, set_config_on_cli_context
11+
from dagster_dg.context import (
12+
DgContext,
13+
ensure_uv_lock,
14+
fetch_component_registry,
15+
is_inside_code_location_directory,
16+
make_cache_key,
17+
resolve_code_location_root_directory,
18+
)
1019
from dagster_dg.utils import DgClickGroup
1120
from dagster_dg.version import __version__
1221

@@ -49,6 +58,15 @@ def create_dg_cli():
4958
help="Clear the cache before running the command.",
5059
default=False,
5160
)
61+
@click.option(
62+
"--rebuild-component-registry",
63+
is_flag=True,
64+
help=(
65+
"Recompute and cache the set of available component types for the current environment."
66+
" Note that this also happens automatically whenever the cache is detected to be stale."
67+
),
68+
default=False,
69+
)
5270
@click.option(
5371
"--cache-dir",
5472
type=Path,
@@ -64,6 +82,7 @@ def group(
6482
disable_cache: bool,
6583
cache_dir: Path,
6684
clear_cache: bool,
85+
rebuild_component_registry: bool,
6786
):
6887
"""CLI tools for working with Dagster components."""
6988
context.ensure_object(dict)
@@ -73,19 +92,57 @@ def group(
7392
disable_cache=disable_cache,
7493
cache_dir=cache_dir,
7594
)
76-
if clear_cache:
77-
DgCache.from_config(config).clear()
95+
set_config_on_cli_context(context, config)
96+
97+
if clear_cache and rebuild_component_registry:
98+
click.echo(
99+
click.style(
100+
"Cannot specify both --clear-cache and --rebuild-component-registry.", fg="red"
101+
)
102+
)
103+
sys.exit(1)
104+
elif clear_cache:
105+
DgCache.from_config(config).clear_all()
78106
if context.invoked_subcommand is None:
79107
context.exit(0)
108+
elif rebuild_component_registry:
109+
if context.invoked_subcommand is not None:
110+
click.echo(
111+
click.style(
112+
"Cannot specify --rebuild-component-registry with a subcommand.", fg="red"
113+
)
114+
)
115+
sys.exit(1)
116+
_rebuild_component_registry(context)
80117
elif context.invoked_subcommand is None:
81118
click.echo(context.get_help())
82119
context.exit(0)
83120

84-
set_config_on_cli_context(context, config)
85-
86121
return group
87122

88123

124+
def _rebuild_component_registry(cli_context: click.Context):
125+
dg_context = DgContext.from_cli_context(cli_context)
126+
if not is_inside_code_location_directory(Path.cwd()):
127+
click.echo(
128+
click.style(
129+
"This command must be run inside a Dagster code location directory.", fg="red"
130+
)
131+
)
132+
sys.exit(1)
133+
if not dg_context.cache:
134+
click.echo(
135+
click.style("Cache is disabled. This command cannot be run without a cache.", fg="red")
136+
)
137+
sys.exit(1)
138+
root_path = resolve_code_location_root_directory(Path.cwd())
139+
ensure_uv_lock(root_path)
140+
key = make_cache_key(root_path, "component_registry_data")
141+
dg_context.cache.clear_key(key)
142+
# This will trigger a rebuild of the component registry
143+
fetch_component_registry(Path.cwd(), dg_context)
144+
145+
89146
ENV_PREFIX = "DAGSTER_DG"
90147
cli = create_dg_cli()
91148

python_modules/libraries/dagster-dg/dagster_dg/cli/generate.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,15 @@ def generate_deployment_command(path: Path) -> None:
6565
" the location of the local Dagster clone will be read from the `DAGSTER_GIT_REPO_DIR` environment variable."
6666
),
6767
)
68+
@click.option(
69+
"--skip-venv",
70+
is_flag=True,
71+
default=False,
72+
help="Do not create a virtual environment for the code location.",
73+
)
6874
@click.pass_context
6975
def generate_code_location_command(
70-
cli_context: click.Context, name: str, use_editable_dagster: Optional[str]
76+
cli_context: click.Context, name: str, use_editable_dagster: Optional[str], skip_venv: bool
7177
) -> None:
7278
"""Generate a Dagster code location file structure and a uv-managed virtual environment scoped
7379
to the code location.
@@ -117,7 +123,7 @@ def generate_code_location_command(
117123
else:
118124
editable_dagster_root = None
119125

120-
generate_code_location(code_location_path, editable_dagster_root)
126+
generate_code_location(code_location_path, dg_context, editable_dagster_root, skip_venv)
121127

122128

123129
@generate_cli.command(name="component-type", cls=DgClickCommand)

python_modules/libraries/dagster-dg/dagster_dg/context.py

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import hashlib
22
import json
33
import os
4+
import subprocess
45
from dataclasses import dataclass
56
from pathlib import Path
67
from typing import Final, Iterable, Optional, Tuple
@@ -15,8 +16,10 @@
1516
from dagster_dg.error import DgError
1617
from dagster_dg.utils import (
1718
execute_code_location_command,
19+
get_uv_command_env,
1820
hash_directory_metadata,
1921
hash_file_metadata,
22+
pushd,
2023
)
2124

2225

@@ -43,13 +46,13 @@ def _is_deployment_root_directory(path: Path) -> bool:
4346

4447
def is_inside_code_location_directory(path: Path) -> bool:
4548
try:
46-
_resolve_code_location_root_directory(path)
49+
resolve_code_location_root_directory(path)
4750
return True
4851
except DgError:
4952
return False
5053

5154

52-
def _resolve_code_location_root_directory(path: Path) -> Path:
55+
def resolve_code_location_root_directory(path: Path) -> Path:
5356
current_path = path.absolute()
5457
while not _is_code_location_root_directory(current_path):
5558
current_path = current_path.parent
@@ -134,6 +137,31 @@ def make_cache_key(code_location_path: Path, data_type: CachableDataType) -> Tup
134137
return ("_".join(path_parts), env_hash, data_type)
135138

136139

140+
def ensure_uv_lock(root_path: Path) -> None:
141+
with pushd(root_path):
142+
if not (root_path / "uv.lock").exists():
143+
subprocess.run(["uv", "sync"], check=True, env=get_uv_command_env())
144+
145+
146+
def fetch_component_registry(path: Path, dg_context: DgContext) -> RemoteComponentRegistry:
147+
root_path = resolve_code_location_root_directory(path)
148+
149+
cache = dg_context.cache
150+
if cache:
151+
cache_key = make_cache_key(root_path, "component_registry_data")
152+
153+
raw_registry_data = cache.get(cache_key) if cache else None
154+
if not raw_registry_data:
155+
raw_registry_data = execute_code_location_command(
156+
root_path, ["list", "component-types"], dg_context
157+
)
158+
if cache:
159+
cache.set(cache_key, raw_registry_data)
160+
161+
registry_data = json.loads(raw_registry_data)
162+
return RemoteComponentRegistry.from_dict(registry_data)
163+
164+
137165
@dataclass
138166
class CodeLocationDirectoryContext:
139167
"""Class encapsulating contextual information about a components code location directory.
@@ -155,23 +183,9 @@ class CodeLocationDirectoryContext:
155183

156184
@classmethod
157185
def from_path(cls, path: Path, dg_context: DgContext) -> Self:
158-
root_path = _resolve_code_location_root_directory(path)
159-
160-
cache = dg_context.cache
161-
if cache:
162-
cache_key = make_cache_key(root_path, "component_registry_data")
163-
164-
raw_registry_data = cache.get(cache_key) if cache else None
165-
if not raw_registry_data:
166-
raw_registry_data = execute_code_location_command(
167-
root_path, ["list", "component-types"], dg_context
168-
)
169-
if cache:
170-
cache.set(cache_key, raw_registry_data)
171-
172-
registry_data = json.loads(raw_registry_data)
173-
component_registry = RemoteComponentRegistry.from_dict(registry_data)
174-
186+
root_path = resolve_code_location_root_directory(path)
187+
ensure_uv_lock(root_path)
188+
component_registry = fetch_component_registry(path, dg_context)
175189
return cls(
176190
root_path=root_path,
177191
name=path.name,

python_modules/libraries/dagster-dg/dagster_dg/generate.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,18 @@
11
import json
22
import os
3-
import subprocess
43
import textwrap
54
from pathlib import Path
65
from typing import Any, Mapping, Optional
76

87
import click
98

10-
from dagster_dg.context import CodeLocationDirectoryContext, DgContext
11-
from dagster_dg.utils import (
12-
camelcase,
13-
execute_code_location_command,
14-
generate_subtree,
15-
get_uv_command_env,
16-
pushd,
9+
from dagster_dg.context import (
10+
CodeLocationDirectoryContext,
11+
DgContext,
12+
ensure_uv_lock,
13+
fetch_component_registry,
1714
)
15+
from dagster_dg.utils import camelcase, execute_code_location_command, generate_subtree
1816

1917
# ########################
2018
# ##### DEPLOYMENT
@@ -89,7 +87,12 @@ def get_pyproject_toml_uv_sources(editable_dagster_root: str) -> str:
8987
""")
9088

9189

92-
def generate_code_location(path: Path, editable_dagster_root: Optional[str] = None) -> None:
90+
def generate_code_location(
91+
path: Path,
92+
dg_context: DgContext,
93+
editable_dagster_root: Optional[str] = None,
94+
skip_venv: bool = False,
95+
) -> None:
9396
click.echo(f"Creating a Dagster code location at {path}.")
9497

9598
dependencies = get_pyproject_toml_dependencies(use_editable_dagster=bool(editable_dagster_root))
@@ -112,8 +115,9 @@ def generate_code_location(path: Path, editable_dagster_root: Optional[str] = No
112115
)
113116

114117
# Build the venv
115-
with pushd(path):
116-
subprocess.run(["uv", "sync"], check=True, env=get_uv_command_env())
118+
if not skip_venv:
119+
ensure_uv_lock(path)
120+
fetch_component_registry(path, dg_context) # Populate the cache
117121

118122

119123
# ########################

python_modules/libraries/dagster-dg/dagster_dg_tests/cli_tests/test_generate_commands.py

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import pytest
88
import tomli
99
from dagster_dg.context import CodeLocationDirectoryContext, DgContext
10-
from dagster_dg.utils import discover_git_root, ensure_dagster_dg_tests_import
10+
from dagster_dg.utils import discover_git_root, ensure_dagster_dg_tests_import, pushd
1111

1212
ensure_dagster_dg_tests_import()
1313

@@ -40,7 +40,12 @@ def test_generate_deployment_command_already_exists_fails() -> None:
4040

4141

4242
def test_generate_code_location_inside_deployment_success() -> None:
43-
with ProxyRunner.test() as runner, isolated_example_deployment_foo(runner):
43+
# Don't use the test component lib because it is not present in published dagster-components,
44+
# which this test is currently accessing since we are not doing an editable install.
45+
with (
46+
ProxyRunner.test(use_test_component_lib=False) as runner,
47+
isolated_example_deployment_foo(runner),
48+
):
4449
result = runner.invoke("generate", "code-location", "bar")
4550
assert_runner_result(result)
4651
assert Path("code_locations/bar").exists()
@@ -60,9 +65,16 @@ def test_generate_code_location_inside_deployment_success() -> None:
6065
# No tool.uv.sources added without --use-editable-dagster
6166
assert "uv" not in toml["tool"]
6267

68+
# Check cache was populated
69+
with pushd("code_locations/bar"):
70+
result = runner.invoke("--verbose", "list", "component-types")
71+
assert "CACHE [hit]" in result.output
72+
6373

6474
def test_generate_code_location_outside_deployment_success() -> None:
65-
with ProxyRunner.test() as runner, runner.isolated_filesystem():
75+
# Don't use the test component lib because it is not present in published dagster-components,
76+
# which this test is currently accessing since we are not doing an editable install.
77+
with ProxyRunner.test(use_test_component_lib=False) as runner, runner.isolated_filesystem():
6678
result = runner.invoke("generate", "code-location", "bar")
6779
assert_runner_result(result)
6880
assert Path("bar").exists()
@@ -110,6 +122,24 @@ def test_generate_code_location_editable_dagster_success(mode: str, monkeypatch)
110122
}
111123

112124

125+
def test_generate_code_location_skip_venv_success() -> None:
126+
# Don't use the test component lib because it is not present in published dagster-components,
127+
# which this test is currently accessing since we are not doing an editable install.
128+
with ProxyRunner.test() as runner, runner.isolated_filesystem():
129+
result = runner.invoke("generate", "code-location", "--skip-venv", "bar")
130+
assert_runner_result(result)
131+
assert Path("bar").exists()
132+
assert Path("bar/bar").exists()
133+
assert Path("bar/bar/lib").exists()
134+
assert Path("bar/bar/components").exists()
135+
assert Path("bar/bar_tests").exists()
136+
assert Path("bar/pyproject.toml").exists()
137+
138+
# Check venv created
139+
assert not Path("bar/.venv").exists()
140+
assert not Path("bar/uv.lock").exists()
141+
142+
113143
def test_generate_code_location_editable_dagster_no_env_var_no_value_fails(monkeypatch) -> None:
114144
monkeypatch.setenv("DAGSTER_GIT_REPO_DIR", "")
115145
with ProxyRunner.test() as runner, isolated_example_deployment_foo(runner):
@@ -120,9 +150,9 @@ def test_generate_code_location_editable_dagster_no_env_var_no_value_fails(monke
120150

121151
def test_generate_code_location_already_exists_fails() -> None:
122152
with ProxyRunner.test() as runner, isolated_example_deployment_foo(runner):
123-
result = runner.invoke("generate", "code-location", "bar")
153+
result = runner.invoke("generate", "code-location", "bar", "--skip-venv")
124154
assert_runner_result(result)
125-
result = runner.invoke("generate", "code-location", "bar")
155+
result = runner.invoke("generate", "code-location", "bar", "--skip-venv")
126156
assert_runner_result(result, exit_0=False)
127157
assert "already exists" in result.output
128158

0 commit comments

Comments
 (0)