Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion UPGRADING.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,16 @@ Follow the steps above for all shared deployments.

In your personal deployments' ``environment.py`` files:

1. Update the type annotations for ``bqsrc``, ``mksrc``, ``mkdelta``, ``mklist``,
1. Update the type annotations for ``bqsrc``, ``mksrc``, ``mkdelta``, ``mksrcs``,
``mkdict``, and ``env``.

2. Remove the ``prefix`` parameter and its uses from ``bqsrc`` and ``mksrc``.

2. Update the function body of ``mksrc``.

3. Update the assignment of the ``sources`` parameter in the ``AZUL_CATALOGS``
variable.

As always, use the sandbox deployment's ``environment.py`` as a model when
upgrading personal deployments.

Expand Down
52 changes: 32 additions & 20 deletions deployments/anvilbox/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,18 @@
is_sandbox = True

pop = 1 # remove snapshot
no_mirror = 2 # do not mirror files from snapshot (redundant for managed access snapshots)

type ProjectName = str
type SourceSpec = str
type SourceConfig = dict[str, str | int | float | bool | None]
type SourceItem = tuple[SourceSpec, SourceConfig]


def bqsrc(google_project: str,
snapshot: str,
flags: int = 0,
) -> tuple[ProjectName, SourceSpec | None]:
flags: int = 0
) -> tuple[ProjectName, SourceItem | None]:
assert len(google_project) == 8, google_project
project = 'datarepo-dev-' + google_project
assert not snapshot.startswith('ANVIL_'), snapshot
Expand All @@ -28,38 +31,45 @@ def bqsrc(google_project: str,
def mksrc(source_type: Literal['bigquery', 'parquet'],
google_project,
snapshot,
flags: int = 0,
) -> tuple[ProjectName, SourceSpec | None]:
flags: int = 0
) -> tuple[ProjectName, SourceItem | None]:
project = '_'.join(snapshot.split('_')[1:-3])
assert flags <= pop
source = None if flags & pop else ':'.join([
'tdr',
source_type,
'gcp',
google_project,
snapshot,
])
source = None if flags & pop else (
':'.join([
'tdr',
source_type,
'gcp',
google_project,
snapshot,
]),
{
'mirror': not (flags & no_mirror),
}
)

return project, source


def mkdelta(items: list[tuple[ProjectName, SourceSpec | None]]
) -> dict[ProjectName, SourceSpec | None]:
def mkdelta(items: list[tuple[ProjectName, SourceItem | None]]
) -> dict[ProjectName, SourceItem | None]:
result = dict(items)
assert len(items) == len(result), 'collisions detected'
assert list(result.keys()) == sorted(result.keys()), 'input not sorted'
return result


def mklist(catalog: dict[ProjectName, SourceSpec | None]) -> list[SourceSpec]:
return list(filter(None, catalog.values()))
def mksrcs(catalog: dict[ProjectName, SourceItem | None]
) -> dict[SourceSpec, SourceConfig]:
return dict(filter(None, catalog.values()))


def mkdict(previous_catalog: dict[ProjectName, SourceSpec | None],
def mkdict(previous_catalog: dict[ProjectName, SourceItem | None],
num_expected: int,
delta: dict[ProjectName, SourceSpec | None],
) -> dict[ProjectName, SourceSpec | None]:
delta: dict[ProjectName, SourceItem | None],
) -> dict[ProjectName, SourceItem | None]:
catalog = previous_catalog | delta
num_actual = len(mklist(catalog))
num_actual = len(mksrcs(catalog))
assert num_expected == num_actual, (num_expected, num_actual)
return catalog

Expand Down Expand Up @@ -119,7 +129,7 @@ def env() -> Mapping[str, str | None]:
internal=internal,
plugins=dict(metadata=dict(name='anvil'),
repository=dict(name='tdr_anvil')),
sources=list(filter(None, sources.values())))
sources=mksrcs(sources))
for atlas, catalog, sources in [
('anvil', 'anvil', anvil_sources),
]
Expand Down Expand Up @@ -168,4 +178,6 @@ def env() -> Mapping[str, str | None]:
'AZUL_DEPLOYMENT_INCARNATION': '2',

'AZUL_GOOGLE_OAUTH2_CLIENT_ID': '561542988117-cpo2avhomdh6t7fetp91js78cdhm9p47.apps.googleusercontent.com',

'AZUL_ENABLE_MIRRORING': '1',
}
53 changes: 33 additions & 20 deletions deployments/anvildev/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,18 @@
)

pop = 1 # remove snapshot
no_mirror = 2 # do not mirror files from snapshot (redundant for managed access snapshots)

type ProjectName = str
type SourceSpec = str
type SourceConfig = dict[str, str | int | float | bool | None]
type SourceItem = tuple[SourceSpec, SourceConfig]


def bqsrc(google_project: str,
snapshot: str,
flags: int = 0,
) -> tuple[ProjectName, SourceSpec | None]:
flags: int = 0
) -> tuple[ProjectName, SourceItem | None]:
assert len(google_project) == 8, google_project
project = 'datarepo-dev-' + google_project
assert not snapshot.startswith('ANVIL_'), snapshot
Expand All @@ -26,38 +29,45 @@ def bqsrc(google_project: str,
def mksrc(source_type: Literal['bigquery', 'parquet'],
google_project,
snapshot,
flags: int = 0,
) -> tuple[ProjectName, SourceSpec | None]:
flags: int = 0
) -> tuple[ProjectName, SourceItem | None]:
project = '_'.join(snapshot.split('_')[1:-3])
assert flags <= pop
source = None if flags & pop else ':'.join([
'tdr',
source_type,
'gcp',
google_project,
snapshot,
])
source = None if flags & pop else (
':'.join([
'tdr',
source_type,
'gcp',
google_project,
snapshot,
]),
{
'mirror': not (flags & no_mirror),
}
)

return project, source


def mkdelta(items: list[tuple[ProjectName, SourceSpec | None]]
) -> dict[ProjectName, SourceSpec | None]:
def mkdelta(items: list[tuple[ProjectName, SourceItem | None]]
) -> dict[ProjectName, SourceItem | None]:
result = dict(items)
assert len(items) == len(result), 'collisions detected'
assert list(result.keys()) == sorted(result.keys()), 'input not sorted'
return result


def mklist(catalog: dict[ProjectName, SourceSpec | None]) -> list[SourceSpec]:
return list(filter(None, catalog.values()))
def mksrcs(catalog: dict[ProjectName, SourceItem | None]
) -> dict[SourceSpec, SourceConfig]:
return dict(filter(None, catalog.values()))


def mkdict(previous_catalog: dict[ProjectName, SourceSpec | None],
def mkdict(previous_catalog: dict[ProjectName, SourceItem | None],
num_expected: int,
delta: dict[ProjectName, SourceSpec | None],
) -> dict[ProjectName, SourceSpec | None]:
delta: dict[ProjectName, SourceItem | None],
) -> dict[ProjectName, SourceItem | None]:
catalog = previous_catalog | delta
num_actual = len(mklist(catalog))
num_actual = len(mksrcs(catalog))
assert num_expected == num_actual, (num_expected, num_actual)
return catalog

Expand Down Expand Up @@ -104,9 +114,10 @@ def env() -> Mapping[str, str | None]:
'AZUL_CATALOGS': json.dumps({
f'{catalog}{suffix}': dict(atlas=atlas,
internal=internal,
mirror_max_file_size=1.5 * 1024 ** 3,
plugins=dict(metadata=dict(name='anvil'),
repository=dict(name='tdr_anvil')),
sources=list(filter(None, sources.values())))
sources=mksrcs(sources))
for atlas, catalog, sources in [
('anvil', 'anvil', anvil_sources),
]
Expand Down Expand Up @@ -144,6 +155,8 @@ def env() -> Mapping[str, str | None]:

'AZUL_GOOGLE_OAUTH2_CLIENT_ID': '561542988117-3cv4g8ii9enl2000ra6m02r3ne7bgnth.apps.googleusercontent.com',

'AZUL_ENABLE_MIRRORING': '1',

'azul_slack_integration': json.dumps({
'workspace_id': 'T09P9H91S', # ucsc-gi.slack.com
'channel_id': 'C04K4BQET7G' # #team-boardwalk-anvildev
Expand Down
50 changes: 30 additions & 20 deletions deployments/anvilprod/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,18 @@
)

pop = 1 # remove snapshot
no_mirror = 2 # do not mirror files from snapshot (redundant for managed access snapshots)

type ProjectName = str
type SourceSpec = str
type SourceConfig = dict[str, str | int | float | bool | None]
type SourceItem = tuple[SourceSpec, SourceConfig]


def bqsrc(google_project: str,
snapshot: str,
flags: int = 0,
) -> tuple[ProjectName, SourceSpec | None]:
flags: int = 0
) -> tuple[ProjectName, SourceItem | None]:
assert len(google_project) == 8, google_project
project = 'datarepo-' + google_project
# Some snapshots start with AnVIL instead of ANVIL
Expand All @@ -29,38 +32,45 @@ def bqsrc(google_project: str,
def mksrc(source_type: Literal['bigquery', 'parquet'],
google_project,
snapshot,
flags: int = 0,
) -> tuple[ProjectName, SourceSpec | None]:
flags: int = 0
) -> tuple[ProjectName, SourceItem | None]:
project = '_'.join(snapshot.split('_')[1:-3])
assert flags <= pop
source = None if flags & pop else ':'.join([
'tdr',
source_type,
'gcp',
google_project,
snapshot,
])
source = None if flags & pop else (
':'.join([
'tdr',
source_type,
'gcp',
google_project,
snapshot,
]),
{
'mirror': not (flags & no_mirror),
}
)

return project, source


def mkdelta(items: list[tuple[ProjectName, SourceSpec | None]]
) -> dict[ProjectName, SourceSpec | None]:
def mkdelta(items: list[tuple[ProjectName, SourceItem | None]]
) -> dict[ProjectName, SourceItem | None]:
result = dict(items)
assert len(items) == len(result), 'collisions detected'
assert list(result.keys()) == sorted(result.keys()), 'input not sorted'
return result


def mklist(catalog: dict[ProjectName, SourceSpec | None]) -> list[SourceSpec]:
return list(filter(None, catalog.values()))
def mksrcs(catalog: dict[ProjectName, SourceItem | None]
) -> dict[SourceSpec, SourceConfig]:
return dict(filter(None, catalog.values()))


def mkdict(previous_catalog: dict[ProjectName, SourceSpec | None],
def mkdict(previous_catalog: dict[ProjectName, SourceItem | None],
num_expected: int,
delta: dict[ProjectName, SourceSpec | None],
) -> dict[ProjectName, SourceSpec | None]:
delta: dict[ProjectName, SourceItem | None],
) -> dict[ProjectName, SourceItem | None]:
catalog = previous_catalog | delta
num_actual = len(mklist(catalog))
num_actual = len(mksrcs(catalog))
assert num_expected == num_actual, (num_expected, num_actual)
return catalog

Expand Down Expand Up @@ -1232,7 +1242,7 @@ def env() -> Mapping[str, str | None]:
internal=internal,
plugins=dict(metadata=dict(name='anvil'),
repository=dict(name='tdr_anvil')),
sources=list(filter(None, sources.values())))
sources=mksrcs(sources))
for atlas, catalog, sources in [
('anvil', 'anvil11', anvil11_sources),
]
Expand Down
Loading