Skip to content

Commit f50d4d7

Browse files
committed
[u 5/6] Convert source config to dictionary (#7066)
1 parent 6c5b755 commit f50d4d7

File tree

17 files changed

+226
-157
lines changed

17 files changed

+226
-157
lines changed

UPGRADING.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ In your personal deployments' ``environment.py`` files:
3030

3131
2. Remove the ``prefix`` parameter and its uses from ``bqsrc`` and ``mksrc``.
3232

33+
2. Update the function body of ``mksrc``.
34+
35+
3. Update the assignment of the ``sources`` parameter in the ``AZUL_CATALOGS``
36+
variable.
37+
3338
As always, use the sandbox deployment's ``environment.py`` as a model when
3439
upgrading personal deployments.
3540

deployments/anvilbox/environment.py

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,14 @@
1212

1313
type ProjectName = str
1414
type SourceSpec = str
15+
type SourceConfig = dict[str, str | int | float | bool | None]
16+
type SourceItem = tuple[SourceSpec, SourceConfig]
1517

1618

1719
def bqsrc(google_project: str,
1820
snapshot: str,
19-
flags: int = 0,
20-
) -> tuple[ProjectName, SourceSpec | None]:
21+
flags: int = 0
22+
) -> tuple[ProjectName, SourceItem | None]:
2123
assert len(google_project) == 8, google_project
2224
project = 'datarepo-dev-' + google_project
2325
assert not snapshot.startswith('ANVIL_'), snapshot
@@ -28,36 +30,40 @@ def bqsrc(google_project: str,
2830
def mksrc(source_type: Literal['bigquery', 'parquet'],
2931
google_project,
3032
snapshot,
31-
flags: int = 0,
32-
) -> tuple[ProjectName, SourceSpec | None]:
33+
flags: int = 0
34+
) -> tuple[ProjectName, SourceItem | None]:
3335
project = '_'.join(snapshot.split('_')[1:-3])
3436
assert flags <= pop
35-
source = None if flags & pop else ':'.join([
36-
'tdr',
37-
source_type,
38-
'gcp',
39-
google_project,
40-
snapshot,
41-
])
37+
source = None if flags & pop else (
38+
':'.join([
39+
'tdr',
40+
source_type,
41+
'gcp',
42+
google_project,
43+
snapshot,
44+
]),
45+
{}
46+
)
4247
return project, source
4348

4449

45-
def mkdelta(items: list[tuple[ProjectName, SourceSpec | None]]
46-
) -> dict[ProjectName, SourceSpec | None]:
50+
def mkdelta(items: list[tuple[ProjectName, SourceItem | None]]
51+
) -> dict[ProjectName, SourceItem | None]:
4752
result = dict(items)
4853
assert len(items) == len(result), 'collisions detected'
4954
assert list(result.keys()) == sorted(result.keys()), 'input not sorted'
5055
return result
5156

5257

53-
def mksrcs(catalog: dict[ProjectName, SourceSpec | None]) -> list[SourceSpec]:
54-
return list(filter(None, catalog.values()))
58+
def mksrcs(catalog: dict[ProjectName, SourceItem | None]
59+
) -> dict[SourceSpec, SourceConfig]:
60+
return dict(filter(None, catalog.values()))
5561

5662

57-
def mkdict(previous_catalog: dict[ProjectName, SourceSpec | None],
63+
def mkdict(previous_catalog: dict[ProjectName, SourceItem | None],
5864
num_expected: int,
59-
delta: dict[ProjectName, SourceSpec | None],
60-
) -> dict[ProjectName, SourceSpec | None]:
65+
delta: dict[ProjectName, SourceItem | None],
66+
) -> dict[ProjectName, SourceItem | None]:
6167
catalog = previous_catalog | delta
6268
num_actual = len(mksrcs(catalog))
6369
assert num_expected == num_actual, (num_expected, num_actual)

deployments/anvildev/environment.py

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,14 @@
1010

1111
type ProjectName = str
1212
type SourceSpec = str
13+
type SourceConfig = dict[str, str | int | float | bool | None]
14+
type SourceItem = tuple[SourceSpec, SourceConfig]
1315

1416

1517
def bqsrc(google_project: str,
1618
snapshot: str,
17-
flags: int = 0,
18-
) -> tuple[ProjectName, SourceSpec | None]:
19+
flags: int = 0
20+
) -> tuple[ProjectName, SourceItem | None]:
1921
assert len(google_project) == 8, google_project
2022
project = 'datarepo-dev-' + google_project
2123
assert not snapshot.startswith('ANVIL_'), snapshot
@@ -26,36 +28,40 @@ def bqsrc(google_project: str,
2628
def mksrc(source_type: Literal['bigquery', 'parquet'],
2729
google_project,
2830
snapshot,
29-
flags: int = 0,
30-
) -> tuple[ProjectName, SourceSpec | None]:
31+
flags: int = 0
32+
) -> tuple[ProjectName, SourceItem | None]:
3133
project = '_'.join(snapshot.split('_')[1:-3])
3234
assert flags <= pop
33-
source = None if flags & pop else ':'.join([
34-
'tdr',
35-
source_type,
36-
'gcp',
37-
google_project,
38-
snapshot,
39-
])
35+
source = None if flags & pop else (
36+
':'.join([
37+
'tdr',
38+
source_type,
39+
'gcp',
40+
google_project,
41+
snapshot,
42+
]),
43+
{}
44+
)
4045
return project, source
4146

4247

43-
def mkdelta(items: list[tuple[ProjectName, SourceSpec | None]]
44-
) -> dict[ProjectName, SourceSpec | None]:
48+
def mkdelta(items: list[tuple[ProjectName, SourceItem | None]]
49+
) -> dict[ProjectName, SourceItem | None]:
4550
result = dict(items)
4651
assert len(items) == len(result), 'collisions detected'
4752
assert list(result.keys()) == sorted(result.keys()), 'input not sorted'
4853
return result
4954

5055

51-
def mksrcs(catalog: dict[ProjectName, SourceSpec | None]) -> list[SourceSpec]:
52-
return list(filter(None, catalog.values()))
56+
def mksrcs(catalog: dict[ProjectName, SourceItem | None]
57+
) -> dict[SourceSpec, SourceConfig]:
58+
return dict(filter(None, catalog.values()))
5359

5460

55-
def mkdict(previous_catalog: dict[ProjectName, SourceSpec | None],
61+
def mkdict(previous_catalog: dict[ProjectName, SourceItem | None],
5662
num_expected: int,
57-
delta: dict[ProjectName, SourceSpec | None],
58-
) -> dict[ProjectName, SourceSpec | None]:
63+
delta: dict[ProjectName, SourceItem | None],
64+
) -> dict[ProjectName, SourceItem | None]:
5965
catalog = previous_catalog | delta
6066
num_actual = len(mksrcs(catalog))
6167
assert num_expected == num_actual, (num_expected, num_actual)

deployments/anvilprod/environment.py

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,14 @@
1212

1313
type ProjectName = str
1414
type SourceSpec = str
15+
type SourceConfig = dict[str, str | int | float | bool | None]
16+
type SourceItem = tuple[SourceSpec, SourceConfig]
1517

1618

1719
def bqsrc(google_project: str,
1820
snapshot: str,
19-
flags: int = 0,
20-
) -> tuple[ProjectName, SourceSpec | None]:
21+
flags: int = 0
22+
) -> tuple[ProjectName, SourceItem | None]:
2123
assert len(google_project) == 8, google_project
2224
project = 'datarepo-' + google_project
2325
# Some snapshots start with AnVIL instead of ANVIL
@@ -29,36 +31,40 @@ def bqsrc(google_project: str,
2931
def mksrc(source_type: Literal['bigquery', 'parquet'],
3032
google_project,
3133
snapshot,
32-
flags: int = 0,
33-
) -> tuple[ProjectName, SourceSpec | None]:
34+
flags: int = 0
35+
) -> tuple[ProjectName, SourceItem | None]:
3436
project = '_'.join(snapshot.split('_')[1:-3])
3537
assert flags <= pop
36-
source = None if flags & pop else ':'.join([
37-
'tdr',
38-
source_type,
39-
'gcp',
40-
google_project,
41-
snapshot,
42-
])
38+
source = None if flags & pop else (
39+
':'.join([
40+
'tdr',
41+
source_type,
42+
'gcp',
43+
google_project,
44+
snapshot,
45+
]),
46+
{}
47+
)
4348
return project, source
4449

4550

46-
def mkdelta(items: list[tuple[ProjectName, SourceSpec | None]]
47-
) -> dict[ProjectName, SourceSpec | None]:
51+
def mkdelta(items: list[tuple[ProjectName, SourceItem | None]]
52+
) -> dict[ProjectName, SourceItem | None]:
4853
result = dict(items)
4954
assert len(items) == len(result), 'collisions detected'
5055
assert list(result.keys()) == sorted(result.keys()), 'input not sorted'
5156
return result
5257

5358

54-
def mksrcs(catalog: dict[ProjectName, SourceSpec | None]) -> list[SourceSpec]:
55-
return list(filter(None, catalog.values()))
59+
def mksrcs(catalog: dict[ProjectName, SourceItem | None]
60+
) -> dict[SourceSpec, SourceConfig]:
61+
return dict(filter(None, catalog.values()))
5662

5763

58-
def mkdict(previous_catalog: dict[ProjectName, SourceSpec | None],
64+
def mkdict(previous_catalog: dict[ProjectName, SourceItem | None],
5965
num_expected: int,
60-
delta: dict[ProjectName, SourceSpec | None],
61-
) -> dict[ProjectName, SourceSpec | None]:
66+
delta: dict[ProjectName, SourceItem | None],
67+
) -> dict[ProjectName, SourceItem | None]:
6268
catalog = previous_catalog | delta
6369
num_actual = len(mksrcs(catalog))
6470
assert num_expected == num_actual, (num_expected, num_actual)

deployments/dev/environment.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,41 +10,47 @@
1010

1111
type ProjectName = str
1212
type SourceSpec = str
13+
type SourceConfig = dict[str, str | int | float | bool | None]
14+
type SourceItem = tuple[SourceSpec, SourceConfig]
1315

1416

1517
def mksrc(source_type: Literal['bigquery', 'parquet'],
1618
google_project,
1719
snapshot,
18-
flags: int = 0,
19-
) -> tuple[ProjectName, SourceSpec | None]:
20+
flags: int = 0
21+
) -> tuple[ProjectName, SourceItem | None]:
2022
_, env, project, _ = snapshot.split('_', 3)
2123
assert flags <= pop
22-
source = None if flags & pop else ':'.join([
23-
'tdr',
24-
source_type,
25-
'gcp',
26-
google_project,
27-
snapshot,
28-
])
24+
source = None if flags & pop else (
25+
':'.join([
26+
'tdr',
27+
source_type,
28+
'gcp',
29+
google_project,
30+
snapshot,
31+
]),
32+
{}
33+
)
2934
return project, source
3035

3136

32-
def mkdelta(items: list[tuple[ProjectName, SourceSpec | None]]
33-
) -> dict[ProjectName, SourceSpec | None]:
37+
def mkdelta(items: list[tuple[ProjectName, SourceItem | None]]
38+
) -> dict[ProjectName, SourceItem | None]:
3439
result = dict(items)
3540
assert len(items) == len(result), 'collisions detected'
3641
assert list(result.keys()) == sorted(result.keys()), 'input not sorted'
3742
return result
3843

3944

40-
def mksrcs(catalog: dict[ProjectName, SourceSpec | None]) -> list[SourceSpec]:
41-
return list(filter(None, catalog.values()))
45+
def mksrcs(catalog: dict[ProjectName, SourceItem | None]
46+
) -> dict[SourceSpec, SourceConfig]:
47+
return dict(filter(None, catalog.values()))
4248

4349

44-
def mkdict(previous_catalog: dict[ProjectName, SourceSpec | None],
50+
def mkdict(previous_catalog: dict[ProjectName, SourceItem | None],
4551
num_expected: int,
46-
delta: dict[ProjectName, SourceSpec | None],
47-
) -> dict[ProjectName, SourceSpec | None]:
52+
delta: dict[ProjectName, SourceItem | None],
53+
) -> dict[ProjectName, SourceItem | None]:
4854
catalog = previous_catalog | delta
4955
num_actual = len(mksrcs(catalog))
5056
assert num_expected == num_actual, (num_expected, num_actual)

deployments/hammerbox/environment.py

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,14 @@
1414

1515
type ProjectName = str
1616
type SourceSpec = str
17+
type SourceConfig = dict[str, str | int | float | bool | None]
18+
type SourceItem = tuple[SourceSpec, SourceConfig]
1719

1820

1921
def bqsrc(google_project: str,
2022
snapshot: str,
21-
flags: int = 0,
22-
) -> tuple[ProjectName, SourceSpec | None]:
23+
flags: int = 0
24+
) -> tuple[ProjectName, SourceItem | None]:
2325
assert len(google_project) == 8, google_project
2426
project = 'datarepo-' + google_project
2527
# Some snapshots start with AnVIL instead of ANVIL
@@ -31,36 +33,40 @@ def bqsrc(google_project: str,
3133
def mksrc(source_type: Literal['bigquery', 'parquet'],
3234
google_project,
3335
snapshot,
34-
flags: int = 0,
35-
) -> tuple[ProjectName, SourceSpec | None]:
36+
flags: int = 0
37+
) -> tuple[ProjectName, SourceItem | None]:
3638
project = '_'.join(snapshot.split('_')[1:-3])
3739
assert flags <= pop
38-
source = None if flags & pop else ':'.join([
39-
'tdr',
40-
source_type,
41-
'gcp',
42-
google_project,
43-
snapshot,
44-
])
40+
source = None if flags & pop else (
41+
':'.join([
42+
'tdr',
43+
source_type,
44+
'gcp',
45+
google_project,
46+
snapshot,
47+
]),
48+
{}
49+
)
4550
return project, source
4651

4752

48-
def mkdelta(items: list[tuple[ProjectName, SourceSpec | None]]
49-
) -> dict[ProjectName, SourceSpec | None]:
53+
def mkdelta(items: list[tuple[ProjectName, SourceItem | None]]
54+
) -> dict[ProjectName, SourceItem | None]:
5055
result = dict(items)
5156
assert len(items) == len(result), 'collisions detected'
5257
assert list(result.keys()) == sorted(result.keys()), 'input not sorted'
5358
return result
5459

5560

56-
def mksrcs(catalog: dict[ProjectName, SourceSpec | None]) -> list[SourceSpec]:
57-
return list(filter(None, catalog.values()))
61+
def mksrcs(catalog: dict[ProjectName, SourceItem | None]
62+
) -> dict[SourceSpec, SourceConfig]:
63+
return dict(filter(None, catalog.values()))
5864

5965

60-
def mkdict(previous_catalog: dict[ProjectName, SourceSpec | None],
66+
def mkdict(previous_catalog: dict[ProjectName, SourceItem | None],
6167
num_expected: int,
62-
delta: dict[ProjectName, SourceSpec | None],
63-
) -> dict[ProjectName, SourceSpec | None]:
68+
delta: dict[ProjectName, SourceItem | None],
69+
) -> dict[ProjectName, SourceItem | None]:
6470
catalog = previous_catalog | delta
6571
num_actual = len(mksrcs(catalog))
6672
assert num_expected == num_actual, (num_expected, num_actual)

0 commit comments

Comments
 (0)