Skip to content

Commit 89bcb1e

Browse files
committed
fix deploy for dmo package
1 parent 87f5b73 commit 89bcb1e

2 files changed

Lines changed: 199 additions & 23 deletions

File tree

src/datacustomcode/deploy.py

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232
from loguru import logger
3333
import pydantic
34-
from pydantic import BaseModel
34+
from pydantic import BaseModel, model_validator
3535
import requests
3636

3737
from datacustomcode.cmd import cmd_output
@@ -376,13 +376,37 @@ class FunctionConfig(BaseConfig):
376376
pass
377377

378378

379+
class DloPermission(BaseModel):
380+
dlo: list[str]
381+
382+
383+
class DmoPermission(BaseModel):
384+
dmo: list[str]
385+
386+
379387
class Permissions(BaseModel):
380-
read: Union[DloPermission]
381-
write: Union[DloPermission]
388+
read: Union[DloPermission, DmoPermission]
389+
write: Union[DloPermission, DmoPermission]
390+
391+
@model_validator(mode="after")
392+
def _no_mixed_layers(self) -> "Permissions":
393+
read_is_dlo = isinstance(self.read, DloPermission)
394+
write_is_dlo = isinstance(self.write, DloPermission)
395+
if read_is_dlo != write_is_dlo:
396+
raise ValueError(
397+
"permissions.read and permissions.write must both reference "
398+
"DLOs or both reference DMOs (got "
399+
f"read={type(self.read).__name__}, "
400+
f"write={type(self.write).__name__})"
401+
)
402+
return self
382403

383404

384-
class DloPermission(BaseModel):
385-
dlo: list[str]
405+
def _permission_entries(perm: Union[DloPermission, DmoPermission]) -> list[str]:
406+
"""Return the list of object names regardless of layer (DLO or DMO)."""
407+
if isinstance(perm, DloPermission):
408+
return perm.dlo
409+
return perm.dmo
386410

387411

388412
def get_config(directory: str) -> BaseConfig:
@@ -404,10 +428,17 @@ def get_config(directory: str) -> BaseConfig:
404428
except json.JSONDecodeError as err:
405429
raise ValueError(f"config.json at {config_path} is not valid JSON") from err
406430
except pydantic.ValidationError as err:
407-
missing_fields = [str(err["loc"][0]) for err in err.errors()]
431+
errors = err.errors()
432+
missing = [e for e in errors if e.get("type") == "missing"]
433+
if missing and len(missing) == len(errors):
434+
missing_fields = [str(e["loc"][0]) for e in missing]
435+
raise ValueError(
436+
f"config.json at {config_path} is missing required "
437+
f"fields: {', '.join(missing_fields)}"
438+
) from err
439+
messages = [str(e.get("msg", "")) for e in errors]
408440
raise ValueError(
409-
f"config.json at {config_path} is missing required "
410-
f"fields: {', '.join(missing_fields)}"
441+
f"config.json at {config_path} is invalid: {'; '.join(messages)}"
411442
) from err
412443

413444

@@ -421,17 +452,21 @@ def create_data_transform(
421452
script_name = metadata.name
422453
request_hydrated = DATA_TRANSFORM_REQUEST_TEMPLATE.copy()
423454

424-
# Add nodes for each write DLO
425-
for i, dlo in enumerate(data_transform_config.permissions.write.dlo, 1):
455+
# Add nodes for each write entry (DLO or DMO)
456+
for i, name in enumerate(
457+
_permission_entries(data_transform_config.permissions.write), 1
458+
):
426459
request_hydrated["nodes"][f"node{i}"] = {
427-
"relation_name": dlo,
460+
"relation_name": name,
428461
"config": {"materialized": "table"},
429462
"compiled_code": "",
430463
}
431464

432-
# Add sources for each read DLO
433-
for i, dlo in enumerate(data_transform_config.permissions.read.dlo, 1):
434-
request_hydrated["sources"][f"source{i}"] = {"relation_name": dlo}
465+
# Add sources for each read entry (DLO or DMO)
466+
for i, name in enumerate(
467+
_permission_entries(data_transform_config.permissions.read), 1
468+
):
469+
request_hydrated["sources"][f"source{i}"] = {"relation_name": name}
435470

436471
request_hydrated["macros"]["macro.byoc"]["arguments"][0]["name"] = script_name
437472

tests/test_deploy.py

Lines changed: 150 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from datacustomcode.deploy import (
1515
DloPermission,
16+
DmoPermission,
1617
Permissions,
1718
get_config,
1819
)
@@ -934,6 +935,56 @@ def test_verify_data_transform_config_missing_fields(self, mock_file, mock_exist
934935
):
935936
get_config("/test/dir/payload")
936937

938+
@patch(
939+
"builtins.open",
940+
new_callable=mock_open,
941+
read_data=(
942+
'{"sdkVersion": "1.0.0", "entryPoint": "entrypoint.py", '
943+
'"dataspace": "test_dataspace", '
944+
'"permissions": {"read": {"dmo": ["input_dmo__dlm"]}, '
945+
'"write": {"dmo": ["output_dmo__dlm"]}}}'
946+
),
947+
)
948+
def test_get_config_dmo_permissions(self, mock_file):
949+
"""DMO-only config.json parses into DmoPermission on both sides."""
950+
result = get_config("/test/dir")
951+
assert isinstance(result, DataTransformConfig)
952+
assert isinstance(result.permissions.read, DmoPermission)
953+
assert isinstance(result.permissions.write, DmoPermission)
954+
assert result.permissions.read.dmo == ["input_dmo__dlm"]
955+
assert result.permissions.write.dmo == ["output_dmo__dlm"]
956+
957+
@patch(
958+
"builtins.open",
959+
new_callable=mock_open,
960+
read_data=(
961+
'{"sdkVersion": "1.0.0", "entryPoint": "entrypoint.py", '
962+
'"dataspace": "test_dataspace", '
963+
'"permissions": {"read": {"dlo": ["input_dlo"]}, '
964+
'"write": {"dmo": ["output_dmo__dlm"]}}}'
965+
),
966+
)
967+
def test_get_config_mixed_dlo_dmo_raises(self, mock_file):
968+
"""A config that mixes DLO read with DMO write is rejected."""
969+
with pytest.raises(ValueError) as excinfo:
970+
get_config("/test/dir")
971+
msg = str(excinfo.value)
972+
assert "read" in msg and "write" in msg
973+
974+
@patch(
975+
"builtins.open",
976+
new_callable=mock_open,
977+
read_data=(
978+
'{"sdkVersion": "1.0.0", "entryPoint": "entrypoint.py", '
979+
'"dataspace": "test_dataspace", '
980+
'"permissions": {"read": {}, "write": {"dlo": ["output_dlo"]}}}'
981+
),
982+
)
983+
def test_get_config_empty_permission_raises(self, mock_file):
984+
"""A permission block with neither dlo nor dmo is rejected."""
985+
with pytest.raises(ValueError):
986+
get_config("/test/dir")
987+
937988

938989
class TestCreateDataTransform:
939990
@patch("datacustomcode.deploy.get_config")
@@ -972,18 +1023,108 @@ def test_create_data_transform(self, mock_make_api_call, mock_get_config):
9721023
request_body = mock_make_api_call.call_args[1]["json"]
9731024
assert request_body["definition"]["type"] == "DCSQL"
9741025
assert request_body["dataSpaceName"] == "test_dataspace"
975-
assert "nodes" in request_body["definition"]["manifest"]
976-
assert "sources" in request_body["definition"]["manifest"]
977-
assert "macros" in request_body["definition"]["manifest"]
978-
assert (
979-
request_body["definition"]["manifest"]["macros"]["macro.byoc"]["arguments"][
980-
0
981-
]["name"]
982-
== "test_job"
983-
)
1026+
manifest = request_body["definition"]["manifest"]
1027+
assert manifest["nodes"] == {
1028+
"node1": {
1029+
"relation_name": "output_dlo",
1030+
"config": {"materialized": "table"},
1031+
"compiled_code": "",
1032+
}
1033+
}
1034+
assert manifest["sources"] == {"source1": {"relation_name": "input_dlo"}}
1035+
assert manifest["macros"]["macro.byoc"]["arguments"][0]["name"] == "test_job"
9841036

9851037
assert result == {"id": "transform_id"}
9861038

1039+
@patch("datacustomcode.deploy.get_config")
1040+
@patch("datacustomcode.deploy._make_api_call")
1041+
def test_create_data_transform_dmo(self, mock_make_api_call, mock_get_config):
1042+
"""DMO permissions emit nodes/sources with DMO relation names."""
1043+
access_token = AccessTokenResponse(
1044+
access_token="test_token", instance_url="https://instance.example.com"
1045+
)
1046+
metadata = CodeExtensionMetadata(
1047+
name="dmo_job",
1048+
version="1.0.0",
1049+
description="DMO job",
1050+
computeType="CPU_M",
1051+
codeType="script",
1052+
)
1053+
1054+
data_transform_config = DataTransformConfig(
1055+
sdkVersion="1.0.0",
1056+
entryPoint="entrypoint.py",
1057+
dataspace="test_dataspace",
1058+
permissions=Permissions(
1059+
read=DmoPermission(dmo=["input_dmo__dlm"]),
1060+
write=DmoPermission(dmo=["output_dmo__dlm"]),
1061+
),
1062+
)
1063+
mock_make_api_call.return_value = {"id": "transform_id"}
1064+
1065+
create_data_transform(
1066+
"/test/dir", access_token, metadata, data_transform_config
1067+
)
1068+
1069+
request_body = mock_make_api_call.call_args[1]["json"]
1070+
manifest = request_body["definition"]["manifest"]
1071+
assert manifest["nodes"] == {
1072+
"node1": {
1073+
"relation_name": "output_dmo__dlm",
1074+
"config": {"materialized": "table"},
1075+
"compiled_code": "",
1076+
}
1077+
}
1078+
assert manifest["sources"] == {"source1": {"relation_name": "input_dmo__dlm"}}
1079+
assert manifest["macros"]["macro.byoc"]["arguments"][0]["name"] == "dmo_job"
1080+
assert request_body["dataSpaceName"] == "test_dataspace"
1081+
1082+
@patch("datacustomcode.deploy.get_config")
1083+
@patch("datacustomcode.deploy._make_api_call")
1084+
def test_create_data_transform_multiple_dmos(
1085+
self, mock_make_api_call, mock_get_config
1086+
):
1087+
"""Multiple read DMOs become multiple sources; one write DMO is one node."""
1088+
access_token = AccessTokenResponse(
1089+
access_token="test_token", instance_url="https://instance.example.com"
1090+
)
1091+
metadata = CodeExtensionMetadata(
1092+
name="dmo_multi",
1093+
version="1.0.0",
1094+
description="DMO multi",
1095+
computeType="CPU_M",
1096+
codeType="script",
1097+
)
1098+
1099+
data_transform_config = DataTransformConfig(
1100+
sdkVersion="1.0.0",
1101+
entryPoint="entrypoint.py",
1102+
dataspace="test_dataspace",
1103+
permissions=Permissions(
1104+
read=DmoPermission(dmo=["in1__dlm", "in2__dlm"]),
1105+
write=DmoPermission(dmo=["out__dlm"]),
1106+
),
1107+
)
1108+
mock_make_api_call.return_value = {"id": "transform_id"}
1109+
1110+
create_data_transform(
1111+
"/test/dir", access_token, metadata, data_transform_config
1112+
)
1113+
1114+
request_body = mock_make_api_call.call_args[1]["json"]
1115+
manifest = request_body["definition"]["manifest"]
1116+
assert manifest["sources"] == {
1117+
"source1": {"relation_name": "in1__dlm"},
1118+
"source2": {"relation_name": "in2__dlm"},
1119+
}
1120+
assert manifest["nodes"] == {
1121+
"node1": {
1122+
"relation_name": "out__dlm",
1123+
"config": {"materialized": "table"},
1124+
"compiled_code": "",
1125+
}
1126+
}
1127+
9871128

9881129
class TestDeployFull:
9891130
@patch("datacustomcode.deploy.get_config")

0 commit comments

Comments
 (0)