Skip to content

Commit 73b51e6

Browse files
committed
[u 7/7] Configure mirroring per source (#7066)
1 parent e19ec3f commit 73b51e6

File tree

15 files changed

+96
-30
lines changed

15 files changed

+96
-30
lines changed

UPGRADING.rst

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,27 @@ reverted. This is all fairly informal and loosely defined. Hopefully we won't
2020
have too many entries in this file.
2121

2222

23+
#7066 Configure mirroring per catalog and source
24+
================================================
25+
26+
In your personal deployments' ``environment.py`` file(s):
27+
28+
1. Rename the loop variable ``internal`` to ``is_it``
29+
30+
2. Update the value of the ``pop`` flag
31+
32+
2. Replace ``mklist`` with ``mksrcs`` and update the call site
33+
34+
3. Update the type annotations and definitions for ``bqsrc``, ``mksrc``,
35+
``mkdelta``, and ``mkdict``
36+
37+
4. Insert the ``mirror_limit`` property in the definition of
38+
``AZUL_CATALOGS``
39+
40+
As always, use the sandbox deployment's ``environment.py`` as a model when
41+
upgrading personal deployments.
42+
43+
2344
#7468 Update Swagger to v5.29.4
2445
===============================
2546

deployments/anvilbox/environment.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
is_sandbox = True
1010

1111
pop = 1 # remove snapshot
12+
no_mirror = 2 # do not mirror files from snapshot (redundant for managed access snapshots)
1213

1314
type ProjectName = str
1415
type SourceSpec = str
@@ -33,7 +34,7 @@ def mksrc(source_type: Literal['bigquery', 'parquet'],
3334
flags: int = 0
3435
) -> tuple[ProjectName, SourceItem | None]:
3536
project = '_'.join(snapshot.split('_')[1:-3])
36-
assert flags <= pop
37+
assert flags <= pop | no_mirror
3738
source = None if flags & pop else (
3839
':'.join([
3940
'tdr',
@@ -42,7 +43,9 @@ def mksrc(source_type: Literal['bigquery', 'parquet'],
4243
google_project,
4344
snapshot,
4445
]),
45-
{}
46+
{
47+
'mirror': not (flags & no_mirror),
48+
}
4649
)
4750
return project, source
4851

deployments/anvildev/environment.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
)
88

99
pop = 1 # remove snapshot
10+
no_mirror = 2 # do not mirror files from snapshot (redundant for managed access snapshots)
1011

1112
type ProjectName = str
1213
type SourceSpec = str
@@ -31,7 +32,7 @@ def mksrc(source_type: Literal['bigquery', 'parquet'],
3132
flags: int = 0
3233
) -> tuple[ProjectName, SourceItem | None]:
3334
project = '_'.join(snapshot.split('_')[1:-3])
34-
assert flags <= pop
35+
assert flags <= pop | no_mirror
3536
source = None if flags & pop else (
3637
':'.join([
3738
'tdr',
@@ -40,7 +41,9 @@ def mksrc(source_type: Literal['bigquery', 'parquet'],
4041
google_project,
4142
snapshot,
4243
]),
43-
{}
44+
{
45+
'mirror': not (flags & no_mirror),
46+
}
4447
)
4548
return project, source
4649

deployments/anvilprod/environment.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
)
1010

1111
pop = 1 # remove snapshot
12+
no_mirror = 2 # do not mirror files from snapshot (redundant for managed access snapshots)
1213

1314
type ProjectName = str
1415
type SourceSpec = str
@@ -34,7 +35,7 @@ def mksrc(source_type: Literal['bigquery', 'parquet'],
3435
flags: int = 0
3536
) -> tuple[ProjectName, SourceItem | None]:
3637
project = '_'.join(snapshot.split('_')[1:-3])
37-
assert flags <= pop
38+
assert flags <= pop | no_mirror
3839
source = None if flags & pop else (
3940
':'.join([
4041
'tdr',
@@ -43,7 +44,9 @@ def mksrc(source_type: Literal['bigquery', 'parquet'],
4344
google_project,
4445
snapshot,
4546
]),
46-
{}
47+
{
48+
'mirror': not (flags & no_mirror),
49+
}
4750
)
4851
return project, source
4952

deployments/dev/environment.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
)
88

99
pop = 1 # remove snapshot
10+
no_mirror = 2 # do not mirror files from snapshot (redundant for managed access snapshots)
1011

1112
type ProjectName = str
1213
type SourceSpec = str
@@ -20,7 +21,7 @@ def mksrc(source_type: Literal['bigquery', 'parquet'],
2021
flags: int = 0
2122
) -> tuple[ProjectName, SourceItem | None]:
2223
_, env, project, _ = snapshot.split('_', 3)
23-
assert flags <= pop
24+
assert flags <= pop | no_mirror
2425
source = None if flags & pop else (
2526
':'.join([
2627
'tdr',
@@ -29,7 +30,9 @@ def mksrc(source_type: Literal['bigquery', 'parquet'],
2930
google_project,
3031
snapshot,
3132
]),
32-
{}
33+
{
34+
'mirror': not (flags & no_mirror),
35+
}
3336
)
3437
return project, source
3538

deployments/hammerbox/environment.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
is_sandbox = True
1212

1313
pop = 1 # remove snapshot
14+
no_mirror = 2 # do not mirror files from snapshot (redundant for managed access snapshots)
1415

1516
type ProjectName = str
1617
type SourceSpec = str
@@ -36,7 +37,7 @@ def mksrc(source_type: Literal['bigquery', 'parquet'],
3637
flags: int = 0
3738
) -> tuple[ProjectName, SourceItem | None]:
3839
project = '_'.join(snapshot.split('_')[1:-3])
39-
assert flags <= pop
40+
assert flags <= pop | no_mirror
4041
source = None if flags & pop else (
4142
':'.join([
4243
'tdr',
@@ -45,7 +46,9 @@ def mksrc(source_type: Literal['bigquery', 'parquet'],
4546
google_project,
4647
snapshot,
4748
]),
48-
{}
49+
{
50+
'mirror': not (flags & no_mirror),
51+
}
4952
)
5053
return project, source
5154

deployments/prod/environment.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
)
1010

1111
pop = 1 # remove snapshot
12+
no_mirror = 2 # do not mirror files from snapshot (redundant for managed access snapshots)
1213

1314
type ProjectName = str
1415
type SourceSpec = str
@@ -22,7 +23,7 @@ def mksrc(source_type: Literal['bigquery', 'parquet'],
2223
flags: int = 0
2324
) -> tuple[ProjectName, SourceItem | None]:
2425
_, env, project, _ = snapshot.split('_', 3)
25-
assert flags <= pop
26+
assert flags <= pop | no_mirror
2627
source = None if flags & pop else (
2728
':'.join([
2829
'tdr',
@@ -31,7 +32,9 @@ def mksrc(source_type: Literal['bigquery', 'parquet'],
3132
google_project,
3233
snapshot,
3334
]),
34-
{}
35+
{
36+
'mirror': not (flags & no_mirror),
37+
}
3538
)
3639
return project, source
3740

deployments/sandbox/environment.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
is_sandbox = True
1010

1111
pop = 1 # remove snapshot
12+
no_mirror = 2 # do not mirror files from snapshot (redundant for managed access snapshots)
1213

1314
type ProjectName = str
1415
type SourceSpec = str
@@ -22,7 +23,7 @@ def mksrc(source_type: Literal['bigquery', 'parquet'],
2223
flags: int = 0
2324
) -> tuple[ProjectName, SourceItem | None]:
2425
_, env, project, _ = snapshot.split('_', 3)
25-
assert flags <= pop
26+
assert flags <= pop | no_mirror
2627
source = None if flags & pop else (
2728
':'.join([
2829
'tdr',
@@ -31,7 +32,9 @@ def mksrc(source_type: Literal['bigquery', 'parquet'],
3132
google_project,
3233
snapshot,
3334
]),
34-
{}
35+
{
36+
'mirror': not (flags & no_mirror),
37+
}
3538
)
3639
return project, source
3740

deployments/tempdev/environment.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
)
88

99
pop = 1 # remove snapshot
10+
no_mirror = 2 # do not mirror files from snapshot (redundant for managed access snapshots)
1011

1112
type ProjectName = str
1213
type SourceSpec = str
@@ -31,7 +32,7 @@ def mksrc(source_type: Literal['bigquery', 'parquet'],
3132
flags: int = 0,
3233
) -> tuple[ProjectName, SourceItem | None]:
3334
project = '_'.join(snapshot.split('_')[1:-3])
34-
assert flags <= pop
35+
assert flags <= pop | no_mirror
3536
source = None if flags & pop else (
3637
':'.join([
3738
'tdr',
@@ -40,7 +41,9 @@ def mksrc(source_type: Literal['bigquery', 'parquet'],
4041
google_project,
4142
snapshot,
4243
]),
43-
{}
44+
{
45+
'mirror': not (flags & no_mirror),
46+
}
4447
)
4548
return project, source
4649

src/azul/azulclient.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -401,9 +401,14 @@ def remote_mirror(self,
401401

402402
def messages():
403403
for source, cfg in sources:
404-
log.info('Mirroring files in source %r from catalog %r',
405-
str(source.spec), catalog)
406-
yield self.mirror_source_message(catalog, source)
404+
if cfg.mirror:
405+
log.info('Mirroring files in source %r from catalog %r',
406+
str(source.spec), catalog)
407+
yield self.mirror_source_message(catalog, source)
408+
else:
409+
log.info('Not mirroring any files in source %r from catalog %r because '
410+
'mirroring is explicitly disabled',
411+
str(source.spec), catalog)
407412

408413
self.queue_mirror_messages(messages())
409414

0 commit comments

Comments
 (0)