From 847c96e95790cdf7477ac8ab582f18ea0ef2de7b Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Tue, 6 May 2025 10:32:12 +0200 Subject: [PATCH 01/27] Actual implementation changes --- pyproject.toml | 10 ++++++++- src/sirocco/core/graph_items.py | 1 + src/sirocco/parsing/yaml_data_models.py | 9 +++++++- src/sirocco/workgraph.py | 30 +++++++++++++++++++++++-- 4 files changed, 46 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 760431dc..df145f73 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,8 @@ dependencies = [ "termcolor", "pygraphviz", "lxml", - "f90nml" + "f90nml", + "aiida-shell @ git+https://github.com/sphuber/aiida-shell.git@fix/105/handle-remote-data-argument-placeholders", ] license = {file = "LICENSE"} @@ -76,6 +77,9 @@ ignore = [ ## Hatch configurations +[tool.hatch.metadata] +allow-direct-references = true + [tool.hatch.build.targets.sdist] include = [ "src/sirocco/", @@ -149,3 +153,7 @@ ignore_missing_imports = true [[tool.mypy.overrides]] module = ["aiida_workgraph.sockets.builtins"] ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["termcolor._types"] +ignore_missing_imports = true diff --git a/src/sirocco/core/graph_items.py b/src/sirocco/core/graph_items.py index c3c19c68..79538648 100644 --- a/src/sirocco/core/graph_items.py +++ b/src/sirocco/core/graph_items.py @@ -49,6 +49,7 @@ def from_config(cls, config: ConfigBaseData, coordinates: dict) -> AvailableData data_class = AvailableData if isinstance(config, ConfigAvailableData) else GeneratedData return data_class( name=config.name, + computer=config.computer, type=config.type, src=config.src, coordinates=coordinates, diff --git a/src/sirocco/parsing/yaml_data_models.py b/src/sirocco/parsing/yaml_data_models.py index 97cd3466..0877c05b 100644 --- a/src/sirocco/parsing/yaml_data_models.py +++ b/src/sirocco/parsing/yaml_data_models.py @@ -279,7 +279,14 @@ class ConfigShellTaskSpecs: plugin: ClassVar[Literal["shell"]] = "shell" port_pattern: ClassVar[re.Pattern] = field(default=re.compile(r"{PORT(\[sep=.+\])?::(.+?)}"), repr=False) sep_pattern: ClassVar[re.Pattern] = field(default=re.compile(r"\[sep=(.+)\]"), repr=False) - src: str | None = None + src: str | None = Field( + default=None, + description=( + "If `src` not absolute, this ends up to be relative to the root directory of the config file." + "This should also be solved by registering `Code`s in AiiDA for the required scripts." + "See issue #127 and #60" + ), + ) command: str env_source_files: list[str] = field(default_factory=list) diff --git a/src/sirocco/workgraph.py b/src/sirocco/workgraph.py index be012520..293ba1e9 100644 --- a/src/sirocco/workgraph.py +++ b/src/sirocco/workgraph.py @@ -56,7 +56,15 @@ def _prepare_for_shell_task(task: dict, inputs: dict) -> dict: # Workaround starts here # This part is part of the workaround. We need to manually add the outputs from the task. # Because kwargs are not populated with outputs - default_outputs = {"remote_folder", "remote_stash", "retrieved", "_outputs", "_wait", "stdout", "stderr"} + default_outputs = { + "remote_folder", + "remote_stash", + "retrieved", + "_outputs", + "_wait", + "stdout", + "stderr", + } task_outputs = set(task["outputs"].keys()) task_outputs = task_outputs.union(set(inputs.pop("outputs", []))) missing_outputs = task_outputs.difference(default_outputs) @@ -105,6 +113,7 @@ def __init__(self, core_workflow: core.Workflow): for task in self._core_workflow.tasks: if isinstance(task, core.ShellTask): self._set_shelljob_arguments(task) + self._set_shelljob_filenames(task) # link wait on to workgraph tasks for task in self._core_workflow.tasks: @@ -238,6 +247,8 @@ def _create_shell_task_node(self, task: core.ShellTask): ] prepend_text = "\n".join([f"source {env_source_path}" for env_source_path in env_source_paths]) metadata["options"] = {"prepend_text": prepend_text} + # NOTE: Hardcoded for now, possibly make user-facing option + metadata["options"]["use_symlinks"] = True ## computer if task.computer is not None: @@ -292,7 +303,10 @@ def _link_input_node_to_shelltask(self, task: core.ShellTask, input_: core.Data) socket = getattr(workgraph_task.inputs.nodes, f"{input_label}") socket.value = self.data_from_core(input_) elif isinstance(input_, core.GeneratedData): - self._workgraph.add_link(self.socket_from_core(input_), workgraph_task.inputs[f"nodes.{input_label}"]) + self._workgraph.add_link( + self.socket_from_core(input_), + workgraph_task.inputs[f"nodes.{input_label}"], + ) else: raise TypeError @@ -317,6 +331,18 @@ def _set_shelljob_arguments(self, task: core.ShellTask): _, arguments = self.split_cmd_arg(task.resolve_ports(input_labels)) workgraph_task_arguments.value = arguments + def _set_shelljob_filenames(self, task: core.ShellTask): + """set AiiDA ShellJob filenames for AvailableData entities""" + + filenames = {} + + for input_ in task.input_data_nodes(): + if isinstance(input_, core.AvailableData): + filenames[input_.name] = Path(input_.src).name + + workgraph_task = self.task_from_core(task) + workgraph_task.inputs.filenames.value = filenames + def run( self, inputs: None | dict[str, Any] = None, From dabeed4afeab7b834381b8a22191d6c497d4c5fd Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Tue, 6 May 2025 13:15:06 +0200 Subject: [PATCH 02/27] Add specific tests for filenames argument. --- tests/test_workgraph.py | 150 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 tests/test_workgraph.py diff --git a/tests/test_workgraph.py b/tests/test_workgraph.py new file mode 100644 index 00000000..bfa022f6 --- /dev/null +++ b/tests/test_workgraph.py @@ -0,0 +1,150 @@ +from sirocco.core import Workflow +from aiida import orm +from sirocco.parsing import yaml_data_models as models +from sirocco.workgraph import AiidaWorkGraph + + +def test_set_shelljob_filenames(tmp_path, aiida_localhost): + file_name = "foo.txt" + file_path = tmp_path / file_name + # Dummy script, as `src` must be specified due to relative command path + script_path = tmp_path / "my_script.sh" + + config_wf = models.ConfigWorkflow( + name="remote", + rootdir=tmp_path, + cycles=[ + models.ConfigCycle( + name="remote", + tasks=[ + models.ConfigCycleTask( + name="task", + inputs=[ + models.ConfigCycleTaskInput(name="my_data", port="unused") + ], + ), + ], + ), + ], + tasks=[ + models.ConfigShellTask( + name="task", command="echo test", src=str(script_path) + ), + ], + data=models.ConfigData( + available=[ + models.ConfigAvailableData( + name="my_data", + type=models.DataType.FILE, + src=str(file_path), + computer="localhost", + ) + ], + ), + parameters={}, + ) + + core_wf = Workflow.from_config_workflow(config_workflow=config_wf) + aiida_wf = AiidaWorkGraph(core_workflow=core_wf) + assert isinstance( + aiida_wf._workgraph.tasks[0].inputs.nodes["my_data"].value, orm.RemoteData + ) + assert aiida_wf._workgraph.tasks[0].inputs.filenames.value == {"my_data": "foo.txt"} + + +def test_multiple_inputs_filenames(tmp_path, aiida_localhost): + file_names = ["foo.txt", "bar.txt", "baz.dat"] + for name in file_names: + (tmp_path / name).touch() + script_path = tmp_path / "my_script.sh" + + # Create configuration with multiple inputs + config_wf = models.ConfigWorkflow( + name="remote", + rootdir=tmp_path, + cycles=[ + models.ConfigCycle( + name="remote", + tasks=[ + models.ConfigCycleTask( + name="task", + inputs=[ + models.ConfigCycleTaskInput( + name=f"data_{i}", port=f"port_{i}" + ) + for i in range(len(file_names)) + ], + ), + ], + ), + ], + tasks=[ + models.ConfigShellTask( + name="task", command="echo test", src=str(script_path) + ), + ], + data=models.ConfigData( + available=[ + models.ConfigAvailableData( + name=f"data_{i}", + type=models.DataType.FILE, + src=name, + computer="localhost", + ) + for i, name in enumerate(file_names) + ], + ), + parameters={}, + ) + + core_wf = Workflow.from_config_workflow(config_workflow=config_wf) + aiida_wf = AiidaWorkGraph(core_workflow=core_wf) + + expected_filenames = {f"data_{i}": name for i, name in enumerate(file_names)} + assert aiida_wf._workgraph.tasks[0].inputs.filenames.value == expected_filenames + + +def test_directory_input_filenames(tmp_path, aiida_localhost): + dir_name = "test_dir" + dir_path = tmp_path / dir_name + dir_path.mkdir() + script_path = tmp_path / "my_script.sh" + + config_wf = models.ConfigWorkflow( + name="remote", + rootdir=tmp_path, + cycles=[ + models.ConfigCycle( + name="remote", + tasks=[ + models.ConfigCycleTask( + name="task", + inputs=[ + models.ConfigCycleTaskInput(name="my_dir", port="unused") + ], + ), + ], + ), + ], + tasks=[ + models.ConfigShellTask( + name="task", command="echo test", src=str(script_path) + ), + ], + data=models.ConfigData( + available=[ + models.ConfigAvailableData( + name="my_dir", + type=models.DataType.DIR, + src=dir_name, + computer="localhost", + ) + ], + ), + parameters={}, + ) + + core_wf = Workflow.from_config_workflow(config_workflow=config_wf) + aiida_wf = AiidaWorkGraph(core_workflow=core_wf) + + assert aiida_wf._workgraph.tasks[0].inputs.filenames.value == {"my_dir": dir_name} From a2ca536853bf60ac2ad3d5b273751e7fc52b9ce6 Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Tue, 6 May 2025 14:12:22 +0200 Subject: [PATCH 03/27] Add checks for computer existence for filenames Also, fix hatch --- src/sirocco/parsing/yaml_data_models.py | 14 ++++--- src/sirocco/workgraph.py | 3 +- tests/test_workgraph.py | 51 +++++++++++-------------- 3 files changed, 32 insertions(+), 36 deletions(-) diff --git a/src/sirocco/parsing/yaml_data_models.py b/src/sirocco/parsing/yaml_data_models.py index 0877c05b..9eddc5b8 100644 --- a/src/sirocco/parsing/yaml_data_models.py +++ b/src/sirocco/parsing/yaml_data_models.py @@ -279,13 +279,15 @@ class ConfigShellTaskSpecs: plugin: ClassVar[Literal["shell"]] = "shell" port_pattern: ClassVar[re.Pattern] = field(default=re.compile(r"{PORT(\[sep=.+\])?::(.+?)}"), repr=False) sep_pattern: ClassVar[re.Pattern] = field(default=re.compile(r"\[sep=(.+)\]"), repr=False) - src: str | None = Field( + src: str | None = field( default=None, - description=( - "If `src` not absolute, this ends up to be relative to the root directory of the config file." - "This should also be solved by registering `Code`s in AiiDA for the required scripts." - "See issue #127 and #60" - ), + metadata={ + "description": ( + "If `src` not absolute, this ends up to be relative to the root directory of the config file." + "This should also be solved by registering `Code`s in AiiDA for the required scripts." + "See issues #60 and #127." + ) + }, ) command: str env_source_files: list[str] = field(default_factory=list) diff --git a/src/sirocco/workgraph.py b/src/sirocco/workgraph.py index 293ba1e9..12356f18 100644 --- a/src/sirocco/workgraph.py +++ b/src/sirocco/workgraph.py @@ -335,9 +335,8 @@ def _set_shelljob_filenames(self, task: core.ShellTask): """set AiiDA ShellJob filenames for AvailableData entities""" filenames = {} - for input_ in task.input_data_nodes(): - if isinstance(input_, core.AvailableData): + if isinstance(input_, core.AvailableData) and task.computer and input_.computer: filenames[input_.name] = Path(input_.src).name workgraph_task = self.task_from_core(task) diff --git a/tests/test_workgraph.py b/tests/test_workgraph.py index bfa022f6..9922c1a7 100644 --- a/tests/test_workgraph.py +++ b/tests/test_workgraph.py @@ -1,10 +1,13 @@ -from sirocco.core import Workflow +import pytest from aiida import orm + +from sirocco.core import Workflow from sirocco.parsing import yaml_data_models as models from sirocco.workgraph import AiidaWorkGraph -def test_set_shelljob_filenames(tmp_path, aiida_localhost): +@pytest.mark.usefixtures("aiida_localhost") +def test_set_shelljob_filenames(tmp_path): file_name = "foo.txt" file_path = tmp_path / file_name # Dummy script, as `src` must be specified due to relative command path @@ -19,17 +22,13 @@ def test_set_shelljob_filenames(tmp_path, aiida_localhost): tasks=[ models.ConfigCycleTask( name="task", - inputs=[ - models.ConfigCycleTaskInput(name="my_data", port="unused") - ], + inputs=[models.ConfigCycleTaskInput(name="my_data", port="unused")], ), ], ), ], tasks=[ - models.ConfigShellTask( - name="task", command="echo test", src=str(script_path) - ), + models.ConfigShellTask(name="task", command="echo test", src=str(script_path)), ], data=models.ConfigData( available=[ @@ -46,13 +45,14 @@ def test_set_shelljob_filenames(tmp_path, aiida_localhost): core_wf = Workflow.from_config_workflow(config_workflow=config_wf) aiida_wf = AiidaWorkGraph(core_workflow=core_wf) - assert isinstance( - aiida_wf._workgraph.tasks[0].inputs.nodes["my_data"].value, orm.RemoteData - ) - assert aiida_wf._workgraph.tasks[0].inputs.filenames.value == {"my_data": "foo.txt"} + remote_data = aiida_wf._workgraph.tasks[0].inputs.nodes["my_data"].value # noqa: SLF001 + assert isinstance(remote_data, orm.RemoteData) + filenames = aiida_wf._workgraph.tasks[0].inputs.filenames.value # noqa: SLF001 + assert filenames == {"my_data": "foo.txt"} -def test_multiple_inputs_filenames(tmp_path, aiida_localhost): +@pytest.mark.usefixtures("aiida_localhost") +def test_multiple_inputs_filenames(tmp_path): file_names = ["foo.txt", "bar.txt", "baz.dat"] for name in file_names: (tmp_path / name).touch() @@ -69,9 +69,7 @@ def test_multiple_inputs_filenames(tmp_path, aiida_localhost): models.ConfigCycleTask( name="task", inputs=[ - models.ConfigCycleTaskInput( - name=f"data_{i}", port=f"port_{i}" - ) + models.ConfigCycleTaskInput(name=f"data_{i}", port=f"port_{i}") for i in range(len(file_names)) ], ), @@ -79,9 +77,7 @@ def test_multiple_inputs_filenames(tmp_path, aiida_localhost): ), ], tasks=[ - models.ConfigShellTask( - name="task", command="echo test", src=str(script_path) - ), + models.ConfigShellTask(name="task", command="echo test", src=str(script_path)), ], data=models.ConfigData( available=[ @@ -101,10 +97,12 @@ def test_multiple_inputs_filenames(tmp_path, aiida_localhost): aiida_wf = AiidaWorkGraph(core_workflow=core_wf) expected_filenames = {f"data_{i}": name for i, name in enumerate(file_names)} - assert aiida_wf._workgraph.tasks[0].inputs.filenames.value == expected_filenames + filenames = aiida_wf._workgraph.tasks[0].inputs.filenames.value # noqa: SLF001 + assert filenames == expected_filenames -def test_directory_input_filenames(tmp_path, aiida_localhost): +@pytest.mark.usefixtures("aiida_localhost") +def test_directory_input_filenames(tmp_path): dir_name = "test_dir" dir_path = tmp_path / dir_name dir_path.mkdir() @@ -119,17 +117,13 @@ def test_directory_input_filenames(tmp_path, aiida_localhost): tasks=[ models.ConfigCycleTask( name="task", - inputs=[ - models.ConfigCycleTaskInput(name="my_dir", port="unused") - ], + inputs=[models.ConfigCycleTaskInput(name="my_dir", port="unused")], ), ], ), ], tasks=[ - models.ConfigShellTask( - name="task", command="echo test", src=str(script_path) - ), + models.ConfigShellTask(name="task", command="echo test", src=str(script_path)), ], data=models.ConfigData( available=[ @@ -147,4 +141,5 @@ def test_directory_input_filenames(tmp_path, aiida_localhost): core_wf = Workflow.from_config_workflow(config_workflow=config_wf) aiida_wf = AiidaWorkGraph(core_workflow=core_wf) - assert aiida_wf._workgraph.tasks[0].inputs.filenames.value == {"my_dir": dir_name} + filenames = aiida_wf._workgraph.tasks[0].inputs.filenames.value # noqa: SLF001 + assert filenames == {"my_dir": dir_name} From ee3cbff30a062544b02b9cf73e7936db2229dcb6 Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Thu, 8 May 2025 14:29:25 +0200 Subject: [PATCH 04/27] fix tests --- tests/test_workgraph.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_workgraph.py b/tests/test_workgraph.py index 9922c1a7..293bd4b5 100644 --- a/tests/test_workgraph.py +++ b/tests/test_workgraph.py @@ -28,7 +28,7 @@ def test_set_shelljob_filenames(tmp_path): ), ], tasks=[ - models.ConfigShellTask(name="task", command="echo test", src=str(script_path)), + models.ConfigShellTask(name="task", command="echo test", src=str(script_path), computer='localhost'), ], data=models.ConfigData( available=[ @@ -77,7 +77,7 @@ def test_multiple_inputs_filenames(tmp_path): ), ], tasks=[ - models.ConfigShellTask(name="task", command="echo test", src=str(script_path)), + models.ConfigShellTask(name="task", command="echo test", src=str(script_path), computer='localhost'), ], data=models.ConfigData( available=[ @@ -123,7 +123,7 @@ def test_directory_input_filenames(tmp_path): ), ], tasks=[ - models.ConfigShellTask(name="task", command="echo test", src=str(script_path)), + models.ConfigShellTask(name="task", command="echo test", src=str(script_path), computer='localhost'), ], data=models.ConfigData( available=[ From 996ea7badbda8738548a902c731fa0ea2025fdb8 Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Fri, 9 May 2025 10:32:41 +0200 Subject: [PATCH 05/27] Replace None key with src --- src/sirocco/workgraph.py | 16 ++++- tests/test_workgraph.py | 149 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 160 insertions(+), 5 deletions(-) diff --git a/src/sirocco/workgraph.py b/src/sirocco/workgraph.py index 12356f18..fab7f1e4 100644 --- a/src/sirocco/workgraph.py +++ b/src/sirocco/workgraph.py @@ -336,8 +336,20 @@ def _set_shelljob_filenames(self, task: core.ShellTask): filenames = {} for input_ in task.input_data_nodes(): - if isinstance(input_, core.AvailableData) and task.computer and input_.computer: - filenames[input_.name] = Path(input_.src).name + if task.computer and input_.computer and isinstance(input_, core.AvailableData): + filename = Path(input_.src).name + filenames[input_.name] = filename + # NOTE: GeneratedData has no explicit Computer attribute + elif task.computer and isinstance(input_, core.GeneratedData): + for input_k, input_v in task.inputs.items(): + if not input_v: + continue + if input_ == input_v[0]: + filename = self.label_placeholder(input_).strip('{').strip('}') + if input_k == "None": + filenames[input_.src] = filename + else: + filenames[input_k] = filename workgraph_task = self.task_from_core(task) workgraph_task.inputs.filenames.value = filenames diff --git a/tests/test_workgraph.py b/tests/test_workgraph.py index 293bd4b5..803cb34a 100644 --- a/tests/test_workgraph.py +++ b/tests/test_workgraph.py @@ -4,6 +4,7 @@ from sirocco.core import Workflow from sirocco.parsing import yaml_data_models as models from sirocco.workgraph import AiidaWorkGraph +import textwrap @pytest.mark.usefixtures("aiida_localhost") @@ -28,7 +29,7 @@ def test_set_shelljob_filenames(tmp_path): ), ], tasks=[ - models.ConfigShellTask(name="task", command="echo test", src=str(script_path), computer='localhost'), + models.ConfigShellTask(name="task", command="echo test", src=str(script_path), computer="localhost"), ], data=models.ConfigData( available=[ @@ -77,7 +78,7 @@ def test_multiple_inputs_filenames(tmp_path): ), ], tasks=[ - models.ConfigShellTask(name="task", command="echo test", src=str(script_path), computer='localhost'), + models.ConfigShellTask(name="task", command="echo test", src=str(script_path), computer="localhost"), ], data=models.ConfigData( available=[ @@ -123,7 +124,7 @@ def test_directory_input_filenames(tmp_path): ), ], tasks=[ - models.ConfigShellTask(name="task", command="echo test", src=str(script_path), computer='localhost'), + models.ConfigShellTask(name="task", command="echo test", src=str(script_path), computer="localhost"), ], data=models.ConfigData( available=[ @@ -143,3 +144,145 @@ def test_directory_input_filenames(tmp_path): filenames = aiida_wf._workgraph.tasks[0].inputs.filenames.value # noqa: SLF001 assert filenames == {"my_dir": dir_name} + + +@pytest.mark.usefixtures("aiida_localhost") +def test_set_shelljob_filenames_parametrized(tmp_path): + yaml_str = textwrap.dedent( + """ + start_date: &root_start_date "2026-01-01T00:00" + stop_date: &root_stop_date "2028-01-01T00:00" + cycles: + - bimonthly_tasks: + cycling: + start_date: *root_start_date + stop_date: *root_stop_date + period: P6M + tasks: + - icon: + inputs: + - initial_conditions: + when: + at: *root_start_date + port: init + - icon_restart: + when: + after: *root_start_date + target_cycle: + lag: -P6M + parameters: + foo: single + bar: single + port: restart + - forcing: + port: forcing + outputs: [icon_output, icon_restart] + - statistics_foo: + inputs: + - icon_output: + parameters: + bar: single + port: None + outputs: [analysis_foo] + - statistics_foo_bar: + inputs: + - analysis_foo: + port: None + outputs: [analysis_foo_bar] + - yearly: + cycling: + start_date: *root_start_date + stop_date: *root_stop_date + period: P1Y + tasks: + - merge: + inputs: + - analysis_foo_bar: + target_cycle: + lag: ["P0M", "P6M"] + port: None + outputs: [yearly_analysis] + tasks: + - icon: + plugin: shell + src: scripts/icon.py + command: "icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}" + parameters: [foo, bar] + computer: localhost + - statistics_foo: + plugin: shell + src: scripts/statistics.py + command: "statistics.py {PORT::None}" + parameters: [bar] + computer: localhost + - statistics_foo_bar: + plugin: shell + src: scripts/statistics.py + command: "statistics.py {PORT::None}" + computer: localhost + - merge: + plugin: shell + src: scripts/merge.py + command: "merge.py {PORT::None}" + computer: localhost + data: + available: + - initial_conditions: + type: file + src: data/initial_conditions + computer: localhost + - forcing: + type: file + src: data/forcing + computer: localhost + generated: + - icon_output: + type: file + src: icon_output + parameters: [foo, bar] + - icon_restart: + type: file + src: restart + parameters: [foo, bar] + - analysis_foo: + type: file + src: analysis + parameters: [bar] + - analysis_foo_bar: + type: file + src: analysis + - yearly_analysis: + type: file + src: analysis + parameters: + foo: [0, 1] + bar: [3.0] + """ + ) + yaml_file = tmp_path / "config.yml" + yaml_file.write_text(yaml_str) + + core_wf = Workflow.from_config_file(yaml_file) + aiida_wf = AiidaWorkGraph(core_workflow=core_wf) + filenames_list = [task.inputs.filenames.value for task in aiida_wf._workgraph.tasks] + expected_filenames_list = [ + {"forcing": "forcing", "initial_conditions": "initial_conditions"}, + {"forcing": "forcing", "initial_conditions": "initial_conditions"}, + {"forcing": "forcing", "restart": "icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00"}, + {"forcing": "forcing", "restart": "icon_restart_foo_1___bar_3_0___date_2026_01_01_00_00_00"}, + {"forcing": "forcing", "restart": "icon_restart_foo_0___bar_3_0___date_2026_07_01_00_00_00"}, + {"forcing": "forcing", "restart": "icon_restart_foo_1___bar_3_0___date_2026_07_01_00_00_00"}, + {"forcing": "forcing", "restart": "icon_restart_foo_0___bar_3_0___date_2027_01_01_00_00_00"}, + {"forcing": "forcing", "restart": "icon_restart_foo_1___bar_3_0___date_2027_01_01_00_00_00"}, + {"icon_output": "icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00"}, + {"icon_output": "icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00"}, + {"icon_output": "icon_output_foo_0___bar_3_0___date_2027_01_01_00_00_00"}, + {"icon_output": "icon_output_foo_0___bar_3_0___date_2027_07_01_00_00_00"}, + {"analysis": "analysis_foo_bar_3_0___date_2026_01_01_00_00_00"}, + {"analysis": "analysis_foo_bar_3_0___date_2026_07_01_00_00_00"}, + {"analysis": "analysis_foo_bar_3_0___date_2027_01_01_00_00_00"}, + {"analysis": "analysis_foo_bar_3_0___date_2027_07_01_00_00_00"}, + {"analysis": "analysis_foo_bar_date_2026_01_01_00_00_00"}, + {"analysis": "analysis_foo_bar_date_2027_01_01_00_00_00"}, + ] + assert filenames_list == expected_filenames_list From 2092627312f856297691ade0005ec89aeae86490 Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Fri, 9 May 2025 10:44:42 +0200 Subject: [PATCH 06/27] Add expected arguments list for comparison. --- src/sirocco/workgraph.py | 1 + tests/test_workgraph.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/src/sirocco/workgraph.py b/src/sirocco/workgraph.py index fab7f1e4..2e1e0e3c 100644 --- a/src/sirocco/workgraph.py +++ b/src/sirocco/workgraph.py @@ -353,6 +353,7 @@ def _set_shelljob_filenames(self, task: core.ShellTask): workgraph_task = self.task_from_core(task) workgraph_task.inputs.filenames.value = filenames + # import ipdb; ipdb.set_trace() def run( self, diff --git a/tests/test_workgraph.py b/tests/test_workgraph.py index 803cb34a..734e5065 100644 --- a/tests/test_workgraph.py +++ b/tests/test_workgraph.py @@ -265,6 +265,7 @@ def test_set_shelljob_filenames_parametrized(tmp_path): core_wf = Workflow.from_config_file(yaml_file) aiida_wf = AiidaWorkGraph(core_workflow=core_wf) filenames_list = [task.inputs.filenames.value for task in aiida_wf._workgraph.tasks] + arguments_list = [task.inputs.arguments.value for task in aiida_wf._workgraph.tasks] expected_filenames_list = [ {"forcing": "forcing", "initial_conditions": "initial_conditions"}, {"forcing": "forcing", "initial_conditions": "initial_conditions"}, @@ -285,4 +286,31 @@ def test_set_shelljob_filenames_parametrized(tmp_path): {"analysis": "analysis_foo_bar_date_2026_01_01_00_00_00"}, {"analysis": "analysis_foo_bar_date_2027_01_01_00_00_00"}, ] + expected_arguments_list = [ + "--restart --init {initial_conditions} --forcing {forcing}", + "--restart --init {initial_conditions} --forcing {forcing}", + "--restart {icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00} --init " "--forcing {forcing}", + "--restart {icon_restart_foo_1___bar_3_0___date_2026_01_01_00_00_00} --init " "--forcing {forcing}", + "--restart {icon_restart_foo_0___bar_3_0___date_2026_07_01_00_00_00} --init " "--forcing {forcing}", + "--restart {icon_restart_foo_1___bar_3_0___date_2026_07_01_00_00_00} --init " "--forcing {forcing}", + "--restart {icon_restart_foo_0___bar_3_0___date_2027_01_01_00_00_00} --init " "--forcing {forcing}", + "--restart {icon_restart_foo_1___bar_3_0___date_2027_01_01_00_00_00} --init " "--forcing {forcing}", + "{icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00} " + "{icon_output_foo_1___bar_3_0___date_2026_01_01_00_00_00}", + "{icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00} " + "{icon_output_foo_1___bar_3_0___date_2026_07_01_00_00_00}", + "{icon_output_foo_0___bar_3_0___date_2027_01_01_00_00_00} " + "{icon_output_foo_1___bar_3_0___date_2027_01_01_00_00_00}", + "{icon_output_foo_0___bar_3_0___date_2027_07_01_00_00_00} " + "{icon_output_foo_1___bar_3_0___date_2027_07_01_00_00_00}", + "{analysis_foo_bar_3_0___date_2026_01_01_00_00_00}", + "{analysis_foo_bar_3_0___date_2026_07_01_00_00_00}", + "{analysis_foo_bar_3_0___date_2027_01_01_00_00_00}", + "{analysis_foo_bar_3_0___date_2027_07_01_00_00_00}", + "{analysis_foo_bar_date_2026_01_01_00_00_00} " "{analysis_foo_bar_date_2026_07_01_00_00_00}", + "{analysis_foo_bar_date_2027_01_01_00_00_00} " "{analysis_foo_bar_date_2027_07_01_00_00_00}", + ] assert filenames_list == expected_filenames_list + import ipdb + + ipdb.set_trace() From 48318a220941d95b76fba4e0766a31277982cfca Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Fri, 9 May 2025 11:33:26 +0200 Subject: [PATCH 07/27] Verify with nodes --- src/sirocco/workgraph.py | 25 ++++++++++++++----------- tests/test_workgraph.py | 2 ++ 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/src/sirocco/workgraph.py b/src/sirocco/workgraph.py index 2e1e0e3c..673f22fa 100644 --- a/src/sirocco/workgraph.py +++ b/src/sirocco/workgraph.py @@ -335,23 +335,26 @@ def _set_shelljob_filenames(self, task: core.ShellTask): """set AiiDA ShellJob filenames for AvailableData entities""" filenames = {} + workgraph_task = self.task_from_core(task) + if not workgraph_task.inputs.filenames: + return + for input_ in task.input_data_nodes(): if task.computer and input_.computer and isinstance(input_, core.AvailableData): filename = Path(input_.src).name filenames[input_.name] = filename # NOTE: GeneratedData has no explicit Computer attribute - elif task.computer and isinstance(input_, core.GeneratedData): - for input_k, input_v in task.inputs.items(): - if not input_v: - continue - if input_ == input_v[0]: - filename = self.label_placeholder(input_).strip('{').strip('}') - if input_k == "None": - filenames[input_.src] = filename - else: - filenames[input_k] = filename + # elif task.computer and isinstance(input_, core.GeneratedData): + # for input_k, input_v in task.inputs.items(): + # if not input_v: + # continue + # if input_ == input_v[0]: + # filename = self.label_placeholder(input_).strip('{').strip('}') + # if input_k == "None": + # filenames[input_.src] = filename + # else: + # filenames[input_k] = filename - workgraph_task = self.task_from_core(task) workgraph_task.inputs.filenames.value = filenames # import ipdb; ipdb.set_trace() diff --git a/tests/test_workgraph.py b/tests/test_workgraph.py index 734e5065..2f23caa3 100644 --- a/tests/test_workgraph.py +++ b/tests/test_workgraph.py @@ -266,6 +266,8 @@ def test_set_shelljob_filenames_parametrized(tmp_path): aiida_wf = AiidaWorkGraph(core_workflow=core_wf) filenames_list = [task.inputs.filenames.value for task in aiida_wf._workgraph.tasks] arguments_list = [task.inputs.arguments.value for task in aiida_wf._workgraph.tasks] + import ipdb; ipdb.set_trace() + nodes_list = [list(task.inputs.nodes._sockets.keys()) for task in aiida_wf._workgraph.tasks] expected_filenames_list = [ {"forcing": "forcing", "initial_conditions": "initial_conditions"}, {"forcing": "forcing", "initial_conditions": "initial_conditions"}, From 5ac98bffbe437b19b99813e79d635f2ea61b9756 Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Fri, 9 May 2025 12:01:26 +0200 Subject: [PATCH 08/27] Add minimal CLI interface using typer and rich --- pyproject.toml | 7 +- src/sirocco/cli.py | 247 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 253 insertions(+), 1 deletion(-) create mode 100644 src/sirocco/cli.py diff --git a/pyproject.toml b/pyproject.toml index 1672f84b..f6412e00 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,9 @@ dependencies = [ "termcolor", "pygraphviz", "lxml", - "f90nml" + "f90nml", + "rich", + "typer[all]" ] license = {file = "LICENSE"} @@ -44,6 +46,9 @@ Repository = "https://github.com/C2SM/Sirocco.git" Documentation = "https://c2sm.github.io/Sirocco/" Changelog = "https://github.com/C2SM/Sirocco/blob/main/CHANGELOG.md" +[project.scripts] +sirocco = "sirocco.cli:app" + [tool.pytest.ini_options] # Configuration for [pytest](https://docs.pytest.org) addopts = "--pdbcls=IPython.terminal.debugger:TerminalPdb" diff --git a/src/sirocco/cli.py b/src/sirocco/cli.py new file mode 100644 index 00000000..e5f0e507 --- /dev/null +++ b/src/sirocco/cli.py @@ -0,0 +1,247 @@ +import typer +from pathlib import Path +from typing import Optional +from rich.console import Console +from rich.traceback import install as install_rich_traceback + +from sirocco import parsing +from sirocco import core +from sirocco import vizgraph +from sirocco import pretty_print +from sirocco.workgraph import AiidaWorkGraph +import aiida + + +# --- Typer App and Rich Console Setup --- +# Install rich tracebacks for beautiful error reporting +install_rich_traceback(show_locals=False) + +# Create the Typer app instance +app = typer.Typer( + help="Sirocco Weather and Climate Workflow Management Tool.", + add_completion=True, # Optional: disable shell completion installation prompts +) + +# Create a Rich console instance for printing +console = Console() + + +# --- Helper functions --- +def load_aiida_profile(profile: Optional[str] = None): + try: + aiida.load_profile(profile=profile, allow_switch=True) # Allow switch for flexibility + # console.print(f"ℹ️ AiiDA profile [green]'{aiida.get_profile().name}'[/green] loaded.") + except Exception as e: + console.print(f"[bold red]Failed to load AiiDA profile '{profile if profile else 'default'}': {e}[/bold red]") + console.print("Ensure an AiiDA profile exists and the AiiDA daemon is configured if submitting.") + raise typer.Exit(code=1) + + +def _prepare_aiida_workgraph(workflow_file_str: str, aiida_profile_name: Optional[str]) -> AiidaWorkGraph: + """Helper to load profile, config, and prepare AiidaWorkGraph.""" + load_aiida_profile(aiida_profile_name) + try: + config_workflow = parsing.ConfigWorkflow.from_config_file(workflow_file_str) + core_wf = core.Workflow.from_config_workflow(config_workflow) + aiida_wg = AiidaWorkGraph(core_wf) + console.print(f"⚙️ Workflow [magenta]'{core_wf.name}'[/magenta] prepared for AiiDA execution.") + return aiida_wg + except Exception as e: + console.print(f"[bold red]❌ Failed to prepare workflow for AiiDA: {e}[/bold red]") + console.print_exception() + raise typer.Exit(code=1) + + +# --- CLI Commands --- + + +@app.command() +def verify( + workflow_file: Path = typer.Argument( + ..., # Ellipsis indicates a required argument + exists=True, + file_okay=True, + dir_okay=False, + readable=True, + help="Path to the workflow definition YAML file.", + ), +): + """ + Validate the workflow definition file for syntax and basic consistency. + """ + console.print(f"🔍 Verifying workflow file: [cyan]{workflow_file}[/cyan]") + try: + # Attempt to load and validate the configuration + parsing.ConfigWorkflow.from_config_file(str(workflow_file)) + console.print("[green]✅ Workflow definition is valid.[/green]") + except Exception: + console.print("[bold red]❌ Workflow validation failed:[/bold red]") + # Rich traceback handles printing the exception nicely + console.print_exception() + raise typer.Exit(code=1) + + +@app.command() +def visualize( + workflow_file: Path = typer.Argument( + ..., + exists=True, + file_okay=True, + dir_okay=False, + readable=True, + help="Path to the workflow definition YAML file.", + ), + output_file: Optional[Path] = typer.Option( + None, # Default value is None, making it optional + "--output", + "-o", + writable=True, # Check if the path (or its parent dir) is writable + file_okay=True, + dir_okay=False, + help="Optional path to save the output SVG file.", + ), +): + """ + Generate an interactive SVG visualization of the unrolled workflow. + """ + console.print(f"📊 Visualizing workflow from: [cyan]{workflow_file}[/cyan]") + try: + # 1. Load configuration + config_workflow = parsing.ConfigWorkflow.from_config_file(str(workflow_file)) + + # 2. Create the core workflow representation (unrolls parameters/cycles) + core_workflow = core.Workflow.from_config_workflow(config_workflow) + + # 3. Create the visualization graph + viz_graph = vizgraph.VizGraph.from_core_workflow(core_workflow) + + # 4. Determine output path + if output_file is None: + # Default output name based on workflow name in the same directory + output_path = workflow_file.parent / f"{core_workflow.name}.svg" + else: + output_path = output_file + + # Ensure the output directory exists + output_path.parent.mkdir(parents=True, exist_ok=True) + + # 5. Draw the graph + viz_graph.draw(file_path=output_path) + + console.print(f"[green]✅ Visualization saved to:[/green] [cyan]{output_path.resolve()}[/cyan]") + + except Exception: + console.print("[bold red]❌ Failed to generate visualization:[/bold red]") + console.print_exception() + raise typer.Exit(code=1) + + +@app.command() +def represent( + workflow_file: Path = typer.Argument( + ..., + exists=True, + file_okay=True, + dir_okay=False, + readable=True, + help="Path to the workflow definition YAML file.", + ), +): + """ + Display the text representation of the unrolled workflow graph. + """ + console.print(f"📄 Representing workflow from: [cyan]{workflow_file}[/cyan]") + try: + config_workflow = parsing.ConfigWorkflow.from_config_file(str(workflow_file)) + core_workflow = core.Workflow.from_config_workflow(config_workflow) + + printer = pretty_print.PrettyPrinter(colors=False) + output_from_printer = printer.format(core_workflow) + + console.print(output_from_printer) + + except Exception: + console.print("[bold red]❌ Failed to represent workflow:[/bold red]") + console.print_exception() + raise typer.Exit(code=1) + + +@app.command(help="Run the workflow in a blocking fashion.") +def run( + workflow_file: Path = typer.Argument( + ..., + exists=True, + file_okay=True, + dir_okay=False, + readable=True, + help="Path to the workflow definition YAML file.", + ), + aiida_profile: Optional[str] = typer.Option( + None, "--aiida-profile", "-P", help="AiiDA profile to use (defaults to current active)." + ), +): + aiida_wg = _prepare_aiida_workgraph(str(workflow_file), aiida_profile) + try: + console.print(f"▶️ Running workflow [magenta]'{aiida_wg._core_workflow.name}'[/magenta] directly (blocking)...") + results = aiida_wg.run(inputs=None) # No metadata + console.print("[green]✅ Workflow execution finished.[/green]") + console.print("Results:") + if isinstance(results, dict): + for k, v in results.items(): + console.print(f" [bold blue]{k}[/bold blue]: {v}") + else: + console.print(f" {results}") + except Exception as e: + console.print(f"[bold red]❌ Workflow execution failed during run: {e}[/bold red]") + console.print_exception() + raise typer.Exit(code=1) + + +@app.command(help="Submit the workflow to the AiiDA daemon.") +def submit( + workflow_file: Path = typer.Argument( + ..., + exists=True, + file_okay=True, + dir_okay=False, + readable=True, + help="Path to the workflow definition YAML file.", + ), + aiida_profile: Optional[str] = typer.Option( + None, "--aiida-profile", "-P", help="AiiDA profile to use (defaults to current active)." + ), + wait: bool = typer.Option(False, "--wait", "-w", help="Wait for the workflow to complete after submission."), + timeout: int = typer.Option( # Default AiiDA timeout for wait is often very long or infinite + 3600, "--timeout", "-t", help="Timeout in seconds when waiting (if --wait is used)." + ), +): + """Submit the workflow to the AiiDA daemon.""" + + aiida_wg = _prepare_aiida_workgraph(str(workflow_file), aiida_profile) + try: + console.print(f"🚀 Submitting workflow [magenta]'{aiida_wg._core_workflow.name}'[/magenta] to AiiDA daemon...") + # No metadata passed to submit + results_node = aiida_wg.submit(inputs=None, wait=wait, timeout=timeout if wait else None) + + if isinstance(results_node, aiida.orm.WorkChainNode): + console.print(f"[green]✅ Workflow submitted. PK: {results_node.pk}[/green]") + if wait: + console.print( + f"🏁 Workflow completed. Final state: [bold { 'green' if results_node.is_finished_ok else 'red' }]{results_node.process_state.value.upper()}[/bold { 'green' if results_node.is_finished_ok else 'red' }]" + ) + if not results_node.is_finished_ok: + console.print( + "[yellow]Inspect the workchain for more details (e.g., `verdi process report PK`).[/yellow]" + ) + else: # Should typically be a WorkChainNode + console.print(f"[green]✅ Submission initiated. Result: {results_node}[/green]") + + except Exception as e: + console.print(f"[bold red]❌ Workflow submission failed: {e}[/bold red]") + console.print_exception() + raise typer.Exit(code=1) + + +# --- Main entry point for the script --- +if __name__ == "__main__": + app() From 5febd4f4d006c6decc2a0e82bbf2db72448f7532 Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Mon, 2 Jun 2025 14:34:06 +0200 Subject: [PATCH 09/27] Uncomment out previous implementation and duplicate test --- src/sirocco/parsing/yaml_data_models.py | 30 +++ src/sirocco/workgraph.py | 52 ++-- tests/cases/parameters/config/config.yml | 3 + .../config/ICON/NAMELIST_exclaim_ape_R02B04 | 206 ++++++++++++++ .../remote/config/ICON/icon_master.namelist | 23 ++ tests/cases/remote/config/config.yml | 109 ++++++++ tests/cases/remote/config/data/forcing | 0 .../remote/config/data/initial_conditions | 0 tests/cases/remote/config/scripts/icon.py | 70 +++++ tests/cases/remote/config/scripts/merge.py | 15 ++ .../cases/remote/config/scripts/statistics.py | 15 ++ tests/cases/remote/data/config.txt | 251 ++++++++++++++++++ tests/cases/remote/svg/.gitkeep | 0 tests/test_workgraph.py | 61 ++++- 14 files changed, 806 insertions(+), 29 deletions(-) create mode 100644 tests/cases/remote/config/ICON/NAMELIST_exclaim_ape_R02B04 create mode 100644 tests/cases/remote/config/ICON/icon_master.namelist create mode 100644 tests/cases/remote/config/config.yml create mode 100644 tests/cases/remote/config/data/forcing create mode 100644 tests/cases/remote/config/data/initial_conditions create mode 100755 tests/cases/remote/config/scripts/icon.py create mode 100755 tests/cases/remote/config/scripts/merge.py create mode 100755 tests/cases/remote/config/scripts/statistics.py create mode 100644 tests/cases/remote/data/config.txt create mode 100644 tests/cases/remote/svg/.gitkeep diff --git a/src/sirocco/parsing/yaml_data_models.py b/src/sirocco/parsing/yaml_data_models.py index b00ad31c..62b08cde 100644 --- a/src/sirocco/parsing/yaml_data_models.py +++ b/src/sirocco/parsing/yaml_data_models.py @@ -713,6 +713,36 @@ def from_config_file(cls, config_path: str) -> Self: object_["rootdir"] = config_resolved_path.parent adapter = TypeAdapter(cls) return adapter.validate_python(object_) + + @classmethod + def from_yaml_str(cls, yaml_content: str, name: str = None, rootdir: Path = None) -> Self: + """Creates a Workflow instance from a YAML string. + + Args: + yaml_content (str): The YAML content as a string. + name (str, optional): The name for the workflow. If not provided, defaults to "workflow". + rootdir (Path, optional): The root directory for the workflow. Defaults to current directory. + + Returns: + OBJECT_T: An instance of the specified class type with data parsed and + validated from the YAML content. + """ + if yaml_content.strip() == "": + msg = "YAML content is empty." + raise ValueError(msg) + + reader = YAML(typ="safe", pure=True) + object_ = reader.load(StringIO(yaml_content)) + + # Set name if not specified in YAML + if "name" not in object_: + object_["name"] = name or "workflow" + + # Set rootdir if not specified + object_["rootdir"] = rootdir or Path.cwd() + + adapter = TypeAdapter(cls) + return adapter.validate_python(object_) OBJECT_T = typing.TypeVar("OBJECT_T") diff --git a/src/sirocco/workgraph.py b/src/sirocco/workgraph.py index 61d7e657..3551ac30 100644 --- a/src/sirocco/workgraph.py +++ b/src/sirocco/workgraph.py @@ -27,6 +27,16 @@ def _execute(self, engine_process, args=None, kwargs=None, var_kwargs=None): # from aiida_shell import ShellJob from aiida_workgraph.utils import create_and_pause_process # type: ignore[import-untyped] + # Check here if possible to fix the command, currently looks like: + # { + # 'arguments': '--restart --init {initial_conditions} --forcing {forcing}', + # 'command': '/home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/icon.py', + # 'metadata': {'computer': , + # 'options': {'prepend_text': '', 'use_symlinks': True}}, + # 'nodes': {'forcing': , + # 'initial_conditions': }, + # 'outputs': [] + # } inputs = aiida_workgraph.tasks.factory.shelljob_task.prepare_for_shell_task(kwargs) # Workaround starts here @@ -39,6 +49,11 @@ def _execute(self, engine_process, args=None, kwargs=None, var_kwargs=None): # inputs["outputs"] = list(missing_outputs) # Workaround ends here + # `code_label` is constructed via: + # code_label = f'{command}@{computer.label}' + # In aiida-shell + # ShellCode inherits from InstalledCode... (what about PortableCode??) + inputs["metadata"].update({"call_link_label": self.name}) if self.action == "PAUSE": engine_process.report(f"Task {self.name} is created and paused.") @@ -185,7 +200,10 @@ def _add_aiida_input_data_node(self, data: core.Data): except NotExistent as err: msg = f"Could not find computer {data.computer!r} for input {data}." raise ValueError(msg) from err - self._aiida_data_nodes[label] = aiida.orm.RemoteData(remote_path=data.src, label=label, computer=computer) + # `remote_path` must be str not PosixPath to be JSON-serializable + self._aiida_data_nodes[label] = aiida.orm.RemoteData( + remote_path=str(data.src), label=label, computer=computer + ) elif data.type == "file": self._aiida_data_nodes[label] = aiida.orm.SinglefileData(label=label, file=data_full_path) elif data.type == "dir": @@ -315,37 +333,41 @@ def _set_shelljob_arguments(self, task: core.ShellTask): workgraph_task_arguments.value = arguments def _set_shelljob_filenames(self, task: core.ShellTask): - """set AiiDA ShellJob filenames for AvailableData entities""" + """set AiiDA ShellJob filenames for Data entities""" filenames = {} workgraph_task = self.task_from_core(task) if not workgraph_task.inputs.filenames: + import ipdb; ipdb.set_trace() return for input_ in task.input_data_nodes(): if task.computer and input_.computer and isinstance(input_, core.AvailableData): filename = Path(input_.src).name filenames[input_.name] = filename - # NOTE: GeneratedData has no explicit Computer attribute - # elif task.computer and isinstance(input_, core.GeneratedData): - # for input_k, input_v in task.inputs.items(): - # if not input_v: - # continue - # if input_ == input_v[0]: - # filename = self.label_placeholder(input_).strip('{').strip('}') - # if input_k == "None": - # filenames[input_.src] = filename - # else: - # filenames[input_k] = filename - + # PRCOMMENT: GeneratedData has no explicit Computer attribute. + # This is explicit from task, or should we add that, as well? (probably not necessary) + elif task.computer and isinstance(input_, core.GeneratedData): + for input_k, input_v in task.inputs.items(): + if not input_v: + continue + if input_ == input_v[0]: + filename = self.label_placeholder(input_).strip('{').strip('}') + if input_k == "None": + filenames[str(input_.src)] = filename + else: + filenames[input_k] = filename + + if not filenames: + import ipdb; ipdb.set_trace() workgraph_task.inputs.filenames.value = filenames - # import ipdb; ipdb.set_trace() def run( self, inputs: None | dict[str, Any] = None, metadata: None | dict[str, Any] = None, ) -> aiida.orm.Node: + # import ipdb; ipdb.set_trace() self._workgraph.run(inputs=inputs, metadata=metadata) if (output_node := self._workgraph.process) is None: # The node should not be None after a run, it should contain exit code and message so if the node is None something internal went wrong diff --git a/tests/cases/parameters/config/config.yml b/tests/cases/parameters/config/config.yml index 25fa32d6..00b08425 100644 --- a/tests/cases/parameters/config/config.yml +++ b/tests/cases/parameters/config/config.yml @@ -56,6 +56,9 @@ cycles: tasks: - icon: plugin: shell + # For relative path execpt if this cannot be resolved to a registered code + # Probably shouldn't enforce to absolute path; or provide a code argument instead, this should be + # unambigous src: scripts/icon.py command: "icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}" parameters: [foo, bar] diff --git a/tests/cases/remote/config/ICON/NAMELIST_exclaim_ape_R02B04 b/tests/cases/remote/config/ICON/NAMELIST_exclaim_ape_R02B04 new file mode 100644 index 00000000..449bbb7e --- /dev/null +++ b/tests/cases/remote/config/ICON/NAMELIST_exclaim_ape_R02B04 @@ -0,0 +1,206 @@ +! parallel_nml: MPI parallelization ------------------------------------------ +¶llel_nml + nproma = 48 ! loop chunk length + nblocks_c = 0 ! loop number of cell blocks + nproma_sub = 48 ! loop chunk length for radiation scheme + p_test_run = .FALSE. ! .TRUE. means verification run for MPI parallelization + num_io_procs = 0 ! number of I/O processors + num_restart_procs = 0 ! number of restart processors + iorder_sendrecv = 3 ! sequence of MPI send/receive calls +/ + +! grid_nml: horizontal grid -------------------------------------------------- +&grid_nml + dynamics_grid_filename = " ./icon_grid_0013_R02B04_R.nc" ! array of the grid filenames for the dycore + lredgrid_phys = .FALSE. ! .true.=radiation is calculated on a reduced grid +/ + +! run_nml: general switches --------------------------------------------------- +&run_nml + num_lev = 60 ! number of full levels of vertical grid + dtime = 2 ! [s] timestep in seconds + ldynamics = .TRUE. ! dynamics --> dynamics_ctl, diffusion_ctl + ltransport = .TRUE. ! switch on tracer transport --> transport_ctl + iforcing = 3 ! 0: dynamical core only, 1: Held-Suarez, 2: ECHAM, 3: NWP + ltestcase = .TRUE. ! run testcase --> testcase_ctl + ntracer = 5 ! number of tracers + msg_level = 12 ! detailed report during integration + ltimer = .TRUE. ! timer for monitoring the runtime of specific routines + timers_level = 10 ! performance timer granularity + check_uuid_gracefully = .TRUE. ! give only warnings for non-matching uuids + output = "nml" ! main switch for enabling/disabling components of the model output +/ + +! nwp_phy_nml: switches for the physics schemes ------------------------------ +&nwp_phy_nml + inwp_gscp = 2 ! cloud microphysics and precipitation 0: none, 1: hydci, 2: hydci_gr + mu_rain = 0.5 ! shap parameter in gamma distribution for rain + rain_n0_factor = 0.1 ! tuning factor for intercept parameter of raindrop size distr. + inwp_convection = 1 ! convection + lshallowconv_only = .FALSE. ! only shallow convection + inwp_radiation = 4 ! 1: RRTM, 2: Ritter-Geleyn 4: ecRad: + latm_above_top = .TRUE. ! take into account atmosphere above model top for radiation computation + inwp_cldcover = 1 ! cloud cover scheme for radiation 5: all or nothing + inwp_turb = 1 ! 1: Raschendorfer, 5: Smagorinsky diffusion (Dipankar et al.) + inwp_satad = 1 ! saturation adjustment + inwp_sso = 0 ! subgrid scale orographic drag + inwp_gwd = 1 ! non-orographic gravity wave drag + inwp_surface = 0 ! surface scheme (0: None, 1: TERRA) + dt_rad = 12 ! time step for radiation in s + dt_ccov = 4 + dt_gwd = 8 + dt_conv = 4 +/ + +! turbdiff_nml: turbulent diffusion ------------------------------------------- +&turbdiff_nml + tkhmin = 0.01 ! minimum limiter for Ks (needed for stability, but unphysical) + tkhmin_strat = 0.01 + tkmmin = 0.01 + tkmmin_strat = 0.01 + rlam_heat = 1.0 ! Scaling factor of the laminar boundary layer for heat (scalars) + pat_len = 100. ! effective length scale of thermal surface patterns + rat_sea = 9.0 ! Ratio of laminar scaling factors for scalars over sea and land. + q_crit = 1.6 ! critical value for normalized super-saturation + tur_len = 150 ! Asymptotic maximal turbulent distance +/ + +! radiation_nml: radiation scheme --------------------------------------------- +&radiation_nml +ecrad_isolver = 2 +izenith = 3 ! zenith angle formula for the radiative transfer computation 3: perpetual equinox + albedo_type = 3 ! type of surface albedo + albedo_fixed = 0.2 ! DL: Discuss value! + irad_o3 = 0 ! ozone climatology 7: GEMS ozone climatology !DL: Want 4 for Aquaplanet + irad_aero = 2 ! aerosol climatology 0: no aerosols 2: global constant + irad_h2o = 1 + irad_co2 = 0 + irad_ch4 = 0 + irad_n2o = 0 + irad_o2 = 0 + irad_cfc11 = 0 + irad_cfc12 = 0 + icld_overlap = 2 ! Generalized Random + ecrad_data_path = './ecrad_data' ! Optical property files path ecRad (link files as path is truncated inside ecrad) +/ + +! nonhydrostatic_nml: nonhydrostatic model ----------------------------------- +&nonhydrostatic_nml + iadv_rhotheta = 2 ! advection method for rho and rhotheta 2: Miura 2nd order + ivctype = 2 ! type of vertical coordinate 1: gal-chen hybrid 2:sleve + itime_scheme = 4 ! time integration scheme + ndyn_substeps = 5 ! number of dynamics steps per fast-physics step + damp_height = 50000.0 ! height at which Rayleigh damping of vertical wind starts + rayleigh_coeff = 0.1 ! Rayleigh damping coefficient + divdamp_order = 4 ! order of divergence damping + divdamp_type = 3 ! type of divergence damping + divdamp_fac = 0.0025 ! scaling factor for divergence damping + l_zdiffu_t = .FALSE. ! specifies computation of Smagorinsky temperature diffusion + htop_moist_proc = 22500.0 ! max. height for moist physics + hbot_qvsubstep = 22500.0 ! height above which QV is advected with substepping scheme +/ + +! sleve_nml: vertical level specification ------------------------------------- +&sleve_nml ! vertical grid standard output for message level >= 15 + min_lay_thckn = 50. ! lowest level thickness (between half-levels) + top_height = 75000. ! Height of model Top + stretch_fac = 0.9 ! stretching towards model top + decay_scale_1 = 4000. ! decay scales for topography + decay_scale_2 = 2500. + decay_exp = 1.2 + flat_height = 16000. ! Height above which the coordinate surfaces are flat DL: set to 20, since no topo? +/ + +! dynamics_nml: dynamical core ----------------------------------------------- +&dynamics_nml + lcoriolis = .TRUE. ! Coriolis Force +/ + +! nh_testcase_nml: testcase namelist ------------------------------------------ +&nh_testcase_nml + nh_test_name = 'APE_nwp' ! test case identifier + ape_sst_case = 'sst_qobs' ! sst distribution + zp_ape = 101325 ! surface pressure [Pa] + ztmc_ape = 50.006 ! total moisture content [kg/m^2] +/ + +! transport_nml: tracer transport --------------------------------------------- +&transport_nml + ihadv_tracer = 52,2,2,2,2,2 ! gdm: 52 combination of hybrid FFSL/Miura3 with subcycling + itype_hlimit = 3,4,4,4,4,4 ! type of limiter for horizontal transport + ivadv_tracer = 3,3,3,3,3,3 ! tracer specific method to compute vertical advection + itype_vlimit = 1,2,1,2,3,1 ! Type of limiter for vertical transport + llsq_svd = .TRUE. ! use SV decomposition for least squares design matrix +/ + +! diffusion_nml: horizontal (numerical) diffusion ---------------------------- +&diffusion_nml + lhdiff_vn = .TRUE. ! diffusion on the horizontal wind field + lhdiff_temp = .TRUE. ! diffusion on the temperature field + lhdiff_w = .TRUE. ! diffusion on the vertical wind field + hdiff_order = 5 ! order of nabla operator for diffusion + itype_vn_diffu = 1 ! reconstruction method used for Smagorinsky diffusion + itype_t_diffu = 2 ! discretization of temperature diffusion + hdiff_efdt_ratio = 24.0 ! ratio of e-folding time to time step + hdiff_smag_fac = 0.025 ! scaling factor for Smagorinsky diffusion +/ + +! io_nml: general switches for model I/O ------------------------------------- +&io_nml + lnetcdf_flt64_output = .TRUE. ! Needed for probtest + write_last_restart = .TRUE. + itype_pres_msl = 4 ! 4: IFS method + restart_file_type = 5 ! 5: NetCDF4 + restart_write_mode = "joint procs multifile" + itype_rh = 1 ! RH w.r.t. water +/ + +! initicon_nml: specify read-in of initial state ------------------------------ +! Needed for probtest +&initicon_nml + pinit_seed = -1 ! seed for perturbation of initial model state. no perturbation by default + pinit_amplitude = 0. ! amplitude of perturbation +/ + +! output namelist: specify output of 2D fields ------------------------------ +&output_nml + output_filename = './atm_2d/' ! file name base + filetype = 5 ! output format 5:NetCDFv4 + filename_format = "exclaim_ape_R02B04_atm_2d_" ! Output filename format + output_start = "2000-01-01T00:00:03Z" + output_end = "2000-01-01T00:00:30Z" + output_interval = "PT2S" + file_interval = "P1D" + include_last = .TRUE. ! flag whether to include the last time step + remap = 0 ! 0: no remapping 1: lat-lon grid + reg_lat_def = -90,0.5,90.0 + reg_lon_def = -180,0.5,179.5 + output_grid = .TRUE. + ml_varlist = 'pres_sfc', + 'tqv' , 'tqc' , 'tqi' , + 'tqr' , 'tqs' , + 'clch' , 'clcm' , 'clcl' , 'clct' , + 'tot_prec','prec_gsp', + 't_2m' , 'qv_2m' , 'u_10m' , 'v_10m' , + 't_g' , 'rh_2m' , 'qv_s', 'tcm','tch', + 'lhfl_s' , 'shfl_s' , 'umfl_s', 'vmfl_s' + 'sob_s' , 'sob_t' , 'sod_t', 'sou_s', + 'thb_s' , 'thb_t' ,'thu_s','pres_sfc' +/ + +&output_nml + output_filename = './atm_3d_pl/'! file name base + filetype = 5 ! output format: 2=GRIB2, 4=NetCDFv2, 5=NetCDFv4 + filename_format = "exclaim_ape_R02B04_atm_3d_pl_" + output_start = "2000-01-01T00:00:00Z" + output_end = "2000-01-01T00:00:30Z" + output_interval = "PT2S" + file_interval = "P1D" + include_last = .TRUE. ! flag whether to include the last time step + remap = 0 ! 0: no remapping 1: lat-lon grid + reg_lat_def = -90,0.5,90.0 + reg_lon_def = -180,0.5,179.5 + output_grid = .TRUE. + p_levels = 10000,20000,25000,30000,50000,70000,85000,90000,95000,100000 + pl_varlist = 'geopot', 'temp','u', 'v', 'w', 'qv', 'qc', 'qi' +/ diff --git a/tests/cases/remote/config/ICON/icon_master.namelist b/tests/cases/remote/config/ICON/icon_master.namelist new file mode 100644 index 00000000..af88fc57 --- /dev/null +++ b/tests/cases/remote/config/ICON/icon_master.namelist @@ -0,0 +1,23 @@ +&master_nml + lrestart = .false. + read_restart_namelists = .true. +/ +&master_time_control_nml + calendar = 'proleptic gregorian' + experimentStartDate = '2000-01-01T00:00:00Z' + restartTimeIntval = 'PT1M' + checkpointTimeIntval = 'PT1M' + experimentStopDate = '2000-01-01T00:02:00Z' +/ +&time_nml + is_relative_time = .false. +/ +&master_model_nml + model_name="atm" + model_namelist_filename="NAMELIST_exclaim_ape_R02B04" + model_type=1 + model_min_rank=0 + model_max_rank=65535 + model_inc_rank=1 + model_rank_group_size=1 +/ diff --git a/tests/cases/remote/config/config.yml b/tests/cases/remote/config/config.yml new file mode 100644 index 00000000..00b08425 --- /dev/null +++ b/tests/cases/remote/config/config.yml @@ -0,0 +1,109 @@ +--- +start_date: &root_start_date '2026-01-01T00:00' +stop_date: &root_stop_date '2028-01-01T00:00' + +cycles: + - bimonthly_tasks: + cycling: + start_date: *root_start_date + stop_date: *root_stop_date + period: P6M + tasks: + - icon: + inputs: + - initial_conditions: + when: + at: *root_start_date + port: init + - icon_restart: + when: + after: *root_start_date + target_cycle: + lag: -P6M + parameters: + foo: single + bar: single + port: restart + - forcing: + port: forcing + outputs: [icon_output, icon_restart] + - statistics_foo: + inputs: + - icon_output: + parameters: + bar: single + port: None + outputs: [analysis_foo] + - statistics_foo_bar: + inputs: + - analysis_foo: + port: None + outputs: [analysis_foo_bar] + - yearly: + cycling: + start_date: *root_start_date + stop_date: *root_stop_date + period: P1Y + tasks: + - merge: + inputs: + - analysis_foo_bar: + target_cycle: + lag: ['P0M', 'P6M'] + port: None + outputs: [yearly_analysis] + +tasks: + - icon: + plugin: shell + # For relative path execpt if this cannot be resolved to a registered code + # Probably shouldn't enforce to absolute path; or provide a code argument instead, this should be + # unambigous + src: scripts/icon.py + command: "icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}" + parameters: [foo, bar] + - statistics_foo: + plugin: shell + src: scripts/statistics.py + command: "statistics.py {PORT::None}" + parameters: [bar] + - statistics_foo_bar: + plugin: shell + src: scripts/statistics.py + command: "statistics.py {PORT::None}" + - merge: + plugin: shell + src: scripts/merge.py + command: "merge.py {PORT::None}" + +data: + available: + - initial_conditions: + type: file + src: data/initial_conditions + - forcing: + type: file + src: data/forcing + generated: + - icon_output: + type: file + src: icon_output + parameters: [foo, bar] + - icon_restart: + type: file + src: restart + parameters: [foo, bar] + - analysis_foo: + type: file + src: analysis + parameters: [bar] + - analysis_foo_bar: + type: file + src: analysis + - yearly_analysis: + type: file + src: analysis + +parameters: + foo: [0, 1] + bar: [3.0] diff --git a/tests/cases/remote/config/data/forcing b/tests/cases/remote/config/data/forcing new file mode 100644 index 00000000..e69de29b diff --git a/tests/cases/remote/config/data/initial_conditions b/tests/cases/remote/config/data/initial_conditions new file mode 100644 index 00000000..e69de29b diff --git a/tests/cases/remote/config/scripts/icon.py b/tests/cases/remote/config/scripts/icon.py new file mode 100755 index 00000000..32f71ed6 --- /dev/null +++ b/tests/cases/remote/config/scripts/icon.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +"""usage: icon.py [-h] [--init [INIT]] [--restart [RESTART]] [--forcing [FORCING]] [namelist] + +A script mocking parts of icon in a form of a shell script + +options: + -h, --help show this help message and exit + --init [INIT] The icon init file. + --restart [RESTART] The icon restart file. + --forcing [FORCING] The icon forcing file. +""" + +import argparse +from pathlib import Path + +LOG_FILE = Path("icon.log") + + +def log(text: str): + print(text) + with LOG_FILE.open("a") as f: + f.write(text) + + +def main(): + parser = argparse.ArgumentParser(description="A script mocking parts of icon in a form of a shell script.") + parser.add_argument("--init", nargs="?", type=str, help="The icon init file.") + parser.add_argument("namelist", nargs="?", default=None) + parser.add_argument("--restart", nargs="?", type=str, help="The icon restart file.") + parser.add_argument("--forcing", nargs="?", type=str, help="The icon forcing file.") + + args = parser.parse_args() + + output = Path("icon_output") + output.write_text("") + + if args.restart: + if args.init: + msg = "Cannot use '--init' and '--restart' option at the same time." + raise ValueError(msg) + if not Path(args.restart).exists(): + msg = f"The icon restart file {args.restart!r} was not found." + raise FileNotFoundError(msg) + restart = Path(args.restart) + + log(f"Restarting from file {args.restart!r}.") + elif args.init: + if not Path(args.init).exists(): + msg = f"The icon init file {args.init!r} was not found." + raise FileNotFoundError(msg) + + log(f"Starting from init file {args.init!r}.") + else: + msg = "Please provide a restart or init file with the corresponding option." + raise ValueError(msg) + + if args.namelist: + log(f"Namelist {args.namelist} provided. Continue with it.") + else: + log("No namelist provided. Continue with default one.") + + # Main script execution continues here + log("Script finished running calculations") + + restart = Path("restart") + restart.write_text("") + + +if __name__ == "__main__": + main() diff --git a/tests/cases/remote/config/scripts/merge.py b/tests/cases/remote/config/scripts/merge.py new file mode 100755 index 00000000..2fa94152 --- /dev/null +++ b/tests/cases/remote/config/scripts/merge.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python + +import argparse +from pathlib import Path + + +def main(): + parser = argparse.ArgumentParser(description="A script mocking parts of icon in a form of a shell script.") + parser.add_argument("file", nargs="+", type=str, help="The files to analyse.") + args = parser.parse_args() + Path("analysis").write_text(f"analysis for file {args.file}") + + +if __name__ == "__main__": + main() diff --git a/tests/cases/remote/config/scripts/statistics.py b/tests/cases/remote/config/scripts/statistics.py new file mode 100755 index 00000000..2fa94152 --- /dev/null +++ b/tests/cases/remote/config/scripts/statistics.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python + +import argparse +from pathlib import Path + + +def main(): + parser = argparse.ArgumentParser(description="A script mocking parts of icon in a form of a shell script.") + parser.add_argument("file", nargs="+", type=str, help="The files to analyse.") + args = parser.parse_args() + Path("analysis").write_text(f"analysis for file {args.file}") + + +if __name__ == "__main__": + main() diff --git a/tests/cases/remote/data/config.txt b/tests/cases/remote/data/config.txt new file mode 100644 index 00000000..52ca5c4b --- /dev/null +++ b/tests/cases/remote/data/config.txt @@ -0,0 +1,251 @@ +cycles: + - bimonthly_tasks [date: 2026-01-01 00:00:00]: + tasks: + - icon [foo: 0, bar: 3.0, date: 2026-01-01 00:00:00]: + input: + - initial_conditions + - forcing + output: + - icon_output [foo: 0, bar: 3.0, date: 2026-01-01 00:00:00] + - icon_restart [foo: 0, bar: 3.0, date: 2026-01-01 00:00:00] + name: 'icon' + coordinates: {'foo': 0, 'bar': 3.0, 'date': datetime.datetime(2026, 1, 1, 0, 0)} + cycle point: [2026-01-01 00:00:00 -- 2026-07-01 00:00:00] + plugin: 'shell' + src: scripts/icon.py + command: 'icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}' + env source files: [] + - icon [foo: 1, bar: 3.0, date: 2026-01-01 00:00:00]: + input: + - initial_conditions + - forcing + output: + - icon_output [foo: 1, bar: 3.0, date: 2026-01-01 00:00:00] + - icon_restart [foo: 1, bar: 3.0, date: 2026-01-01 00:00:00] + name: 'icon' + coordinates: {'foo': 1, 'bar': 3.0, 'date': datetime.datetime(2026, 1, 1, 0, 0)} + cycle point: [2026-01-01 00:00:00 -- 2026-07-01 00:00:00] + plugin: 'shell' + src: scripts/icon.py + command: 'icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}' + env source files: [] + - statistics_foo [bar: 3.0, date: 2026-01-01 00:00:00]: + input: + - icon_output [foo: 0, bar: 3.0, date: 2026-01-01 00:00:00] + - icon_output [foo: 1, bar: 3.0, date: 2026-01-01 00:00:00] + output: + - analysis_foo [bar: 3.0, date: 2026-01-01 00:00:00] + name: 'statistics_foo' + coordinates: {'bar': 3.0, 'date': datetime.datetime(2026, 1, 1, 0, 0)} + cycle point: [2026-01-01 00:00:00 -- 2026-07-01 00:00:00] + plugin: 'shell' + src: scripts/statistics.py + command: 'statistics.py {PORT::None}' + env source files: [] + - statistics_foo_bar [date: 2026-01-01 00:00:00]: + input: + - analysis_foo [bar: 3.0, date: 2026-01-01 00:00:00] + output: + - analysis_foo_bar [date: 2026-01-01 00:00:00] + name: 'statistics_foo_bar' + coordinates: {'date': datetime.datetime(2026, 1, 1, 0, 0)} + cycle point: [2026-01-01 00:00:00 -- 2026-07-01 00:00:00] + plugin: 'shell' + src: scripts/statistics.py + command: 'statistics.py {PORT::None}' + env source files: [] + - bimonthly_tasks [date: 2026-07-01 00:00:00]: + tasks: + - icon [foo: 0, bar: 3.0, date: 2026-07-01 00:00:00]: + input: + - icon_restart [foo: 0, bar: 3.0, date: 2026-01-01 00:00:00] + - forcing + output: + - icon_output [foo: 0, bar: 3.0, date: 2026-07-01 00:00:00] + - icon_restart [foo: 0, bar: 3.0, date: 2026-07-01 00:00:00] + name: 'icon' + coordinates: {'foo': 0, 'bar': 3.0, 'date': datetime.datetime(2026, 7, 1, 0, 0)} + cycle point: [2026-07-01 00:00:00 -- 2027-01-01 00:00:00] + plugin: 'shell' + src: scripts/icon.py + command: 'icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}' + env source files: [] + - icon [foo: 1, bar: 3.0, date: 2026-07-01 00:00:00]: + input: + - icon_restart [foo: 1, bar: 3.0, date: 2026-01-01 00:00:00] + - forcing + output: + - icon_output [foo: 1, bar: 3.0, date: 2026-07-01 00:00:00] + - icon_restart [foo: 1, bar: 3.0, date: 2026-07-01 00:00:00] + name: 'icon' + coordinates: {'foo': 1, 'bar': 3.0, 'date': datetime.datetime(2026, 7, 1, 0, 0)} + cycle point: [2026-07-01 00:00:00 -- 2027-01-01 00:00:00] + plugin: 'shell' + src: scripts/icon.py + command: 'icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}' + env source files: [] + - statistics_foo [bar: 3.0, date: 2026-07-01 00:00:00]: + input: + - icon_output [foo: 0, bar: 3.0, date: 2026-07-01 00:00:00] + - icon_output [foo: 1, bar: 3.0, date: 2026-07-01 00:00:00] + output: + - analysis_foo [bar: 3.0, date: 2026-07-01 00:00:00] + name: 'statistics_foo' + coordinates: {'bar': 3.0, 'date': datetime.datetime(2026, 7, 1, 0, 0)} + cycle point: [2026-07-01 00:00:00 -- 2027-01-01 00:00:00] + plugin: 'shell' + src: scripts/statistics.py + command: 'statistics.py {PORT::None}' + env source files: [] + - statistics_foo_bar [date: 2026-07-01 00:00:00]: + input: + - analysis_foo [bar: 3.0, date: 2026-07-01 00:00:00] + output: + - analysis_foo_bar [date: 2026-07-01 00:00:00] + name: 'statistics_foo_bar' + coordinates: {'date': datetime.datetime(2026, 7, 1, 0, 0)} + cycle point: [2026-07-01 00:00:00 -- 2027-01-01 00:00:00] + plugin: 'shell' + src: scripts/statistics.py + command: 'statistics.py {PORT::None}' + env source files: [] + - bimonthly_tasks [date: 2027-01-01 00:00:00]: + tasks: + - icon [foo: 0, bar: 3.0, date: 2027-01-01 00:00:00]: + input: + - icon_restart [foo: 0, bar: 3.0, date: 2026-07-01 00:00:00] + - forcing + output: + - icon_output [foo: 0, bar: 3.0, date: 2027-01-01 00:00:00] + - icon_restart [foo: 0, bar: 3.0, date: 2027-01-01 00:00:00] + name: 'icon' + coordinates: {'foo': 0, 'bar': 3.0, 'date': datetime.datetime(2027, 1, 1, 0, 0)} + cycle point: [2027-01-01 00:00:00 -- 2027-07-01 00:00:00] + plugin: 'shell' + src: scripts/icon.py + command: 'icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}' + env source files: [] + - icon [foo: 1, bar: 3.0, date: 2027-01-01 00:00:00]: + input: + - icon_restart [foo: 1, bar: 3.0, date: 2026-07-01 00:00:00] + - forcing + output: + - icon_output [foo: 1, bar: 3.0, date: 2027-01-01 00:00:00] + - icon_restart [foo: 1, bar: 3.0, date: 2027-01-01 00:00:00] + name: 'icon' + coordinates: {'foo': 1, 'bar': 3.0, 'date': datetime.datetime(2027, 1, 1, 0, 0)} + cycle point: [2027-01-01 00:00:00 -- 2027-07-01 00:00:00] + plugin: 'shell' + src: scripts/icon.py + command: 'icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}' + env source files: [] + - statistics_foo [bar: 3.0, date: 2027-01-01 00:00:00]: + input: + - icon_output [foo: 0, bar: 3.0, date: 2027-01-01 00:00:00] + - icon_output [foo: 1, bar: 3.0, date: 2027-01-01 00:00:00] + output: + - analysis_foo [bar: 3.0, date: 2027-01-01 00:00:00] + name: 'statistics_foo' + coordinates: {'bar': 3.0, 'date': datetime.datetime(2027, 1, 1, 0, 0)} + cycle point: [2027-01-01 00:00:00 -- 2027-07-01 00:00:00] + plugin: 'shell' + src: scripts/statistics.py + command: 'statistics.py {PORT::None}' + env source files: [] + - statistics_foo_bar [date: 2027-01-01 00:00:00]: + input: + - analysis_foo [bar: 3.0, date: 2027-01-01 00:00:00] + output: + - analysis_foo_bar [date: 2027-01-01 00:00:00] + name: 'statistics_foo_bar' + coordinates: {'date': datetime.datetime(2027, 1, 1, 0, 0)} + cycle point: [2027-01-01 00:00:00 -- 2027-07-01 00:00:00] + plugin: 'shell' + src: scripts/statistics.py + command: 'statistics.py {PORT::None}' + env source files: [] + - bimonthly_tasks [date: 2027-07-01 00:00:00]: + tasks: + - icon [foo: 0, bar: 3.0, date: 2027-07-01 00:00:00]: + input: + - icon_restart [foo: 0, bar: 3.0, date: 2027-01-01 00:00:00] + - forcing + output: + - icon_output [foo: 0, bar: 3.0, date: 2027-07-01 00:00:00] + - icon_restart [foo: 0, bar: 3.0, date: 2027-07-01 00:00:00] + name: 'icon' + coordinates: {'foo': 0, 'bar': 3.0, 'date': datetime.datetime(2027, 7, 1, 0, 0)} + cycle point: [2027-07-01 00:00:00 -- 2028-01-01 00:00:00] + plugin: 'shell' + src: scripts/icon.py + command: 'icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}' + env source files: [] + - icon [foo: 1, bar: 3.0, date: 2027-07-01 00:00:00]: + input: + - icon_restart [foo: 1, bar: 3.0, date: 2027-01-01 00:00:00] + - forcing + output: + - icon_output [foo: 1, bar: 3.0, date: 2027-07-01 00:00:00] + - icon_restart [foo: 1, bar: 3.0, date: 2027-07-01 00:00:00] + name: 'icon' + coordinates: {'foo': 1, 'bar': 3.0, 'date': datetime.datetime(2027, 7, 1, 0, 0)} + cycle point: [2027-07-01 00:00:00 -- 2028-01-01 00:00:00] + plugin: 'shell' + src: scripts/icon.py + command: 'icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}' + env source files: [] + - statistics_foo [bar: 3.0, date: 2027-07-01 00:00:00]: + input: + - icon_output [foo: 0, bar: 3.0, date: 2027-07-01 00:00:00] + - icon_output [foo: 1, bar: 3.0, date: 2027-07-01 00:00:00] + output: + - analysis_foo [bar: 3.0, date: 2027-07-01 00:00:00] + name: 'statistics_foo' + coordinates: {'bar': 3.0, 'date': datetime.datetime(2027, 7, 1, 0, 0)} + cycle point: [2027-07-01 00:00:00 -- 2028-01-01 00:00:00] + plugin: 'shell' + src: scripts/statistics.py + command: 'statistics.py {PORT::None}' + env source files: [] + - statistics_foo_bar [date: 2027-07-01 00:00:00]: + input: + - analysis_foo [bar: 3.0, date: 2027-07-01 00:00:00] + output: + - analysis_foo_bar [date: 2027-07-01 00:00:00] + name: 'statistics_foo_bar' + coordinates: {'date': datetime.datetime(2027, 7, 1, 0, 0)} + cycle point: [2027-07-01 00:00:00 -- 2028-01-01 00:00:00] + plugin: 'shell' + src: scripts/statistics.py + command: 'statistics.py {PORT::None}' + env source files: [] + - yearly [date: 2026-01-01 00:00:00]: + tasks: + - merge [date: 2026-01-01 00:00:00]: + input: + - analysis_foo_bar [date: 2026-01-01 00:00:00] + - analysis_foo_bar [date: 2026-07-01 00:00:00] + output: + - yearly_analysis [date: 2026-01-01 00:00:00] + name: 'merge' + coordinates: {'date': datetime.datetime(2026, 1, 1, 0, 0)} + cycle point: [2026-01-01 00:00:00 -- 2027-01-01 00:00:00] + plugin: 'shell' + src: scripts/merge.py + command: 'merge.py {PORT::None}' + env source files: [] + - yearly [date: 2027-01-01 00:00:00]: + tasks: + - merge [date: 2027-01-01 00:00:00]: + input: + - analysis_foo_bar [date: 2027-01-01 00:00:00] + - analysis_foo_bar [date: 2027-07-01 00:00:00] + output: + - yearly_analysis [date: 2027-01-01 00:00:00] + name: 'merge' + coordinates: {'date': datetime.datetime(2027, 1, 1, 0, 0)} + cycle point: [2027-01-01 00:00:00 -- 2028-01-01 00:00:00] + plugin: 'shell' + src: scripts/merge.py + command: 'merge.py {PORT::None}' + env source files: [] \ No newline at end of file diff --git a/tests/cases/remote/svg/.gitkeep b/tests/cases/remote/svg/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_workgraph.py b/tests/test_workgraph.py index 2f23caa3..2da6ae40 100644 --- a/tests/test_workgraph.py +++ b/tests/test_workgraph.py @@ -97,9 +97,9 @@ def test_multiple_inputs_filenames(tmp_path): core_wf = Workflow.from_config_workflow(config_workflow=config_wf) aiida_wf = AiidaWorkGraph(core_workflow=core_wf) - expected_filenames = {f"data_{i}": name for i, name in enumerate(file_names)} + obtained_filenames = {f"data_{i}": name for i, name in enumerate(file_names)} filenames = aiida_wf._workgraph.tasks[0].inputs.filenames.value # noqa: SLF001 - assert filenames == expected_filenames + assert filenames == obtained_filenames @pytest.mark.usefixtures("aiida_localhost") @@ -205,35 +205,35 @@ def test_set_shelljob_filenames_parametrized(tmp_path): tasks: - icon: plugin: shell - src: scripts/icon.py + src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/icon.py command: "icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}" parameters: [foo, bar] computer: localhost - statistics_foo: plugin: shell - src: scripts/statistics.py + src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/statistics.py command: "statistics.py {PORT::None}" parameters: [bar] computer: localhost - statistics_foo_bar: plugin: shell - src: scripts/statistics.py + src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/statistics.py command: "statistics.py {PORT::None}" computer: localhost - merge: plugin: shell - src: scripts/merge.py + src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/merge.py command: "merge.py {PORT::None}" computer: localhost data: available: - initial_conditions: type: file - src: data/initial_conditions + src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/small/config/data/initial_conditions computer: localhost - forcing: type: file - src: data/forcing + src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/data/forcing computer: localhost generated: - icon_output: @@ -266,9 +266,9 @@ def test_set_shelljob_filenames_parametrized(tmp_path): aiida_wf = AiidaWorkGraph(core_workflow=core_wf) filenames_list = [task.inputs.filenames.value for task in aiida_wf._workgraph.tasks] arguments_list = [task.inputs.arguments.value for task in aiida_wf._workgraph.tasks] - import ipdb; ipdb.set_trace() nodes_list = [list(task.inputs.nodes._sockets.keys()) for task in aiida_wf._workgraph.tasks] - expected_filenames_list = [ + + obtained_filenames_list = [ {"forcing": "forcing", "initial_conditions": "initial_conditions"}, {"forcing": "forcing", "initial_conditions": "initial_conditions"}, {"forcing": "forcing", "restart": "icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00"}, @@ -288,7 +288,7 @@ def test_set_shelljob_filenames_parametrized(tmp_path): {"analysis": "analysis_foo_bar_date_2026_01_01_00_00_00"}, {"analysis": "analysis_foo_bar_date_2027_01_01_00_00_00"}, ] - expected_arguments_list = [ + obtained_arguments_list = [ "--restart --init {initial_conditions} --forcing {forcing}", "--restart --init {initial_conditions} --forcing {forcing}", "--restart {icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00} --init " "--forcing {forcing}", @@ -312,7 +312,40 @@ def test_set_shelljob_filenames_parametrized(tmp_path): "{analysis_foo_bar_date_2026_01_01_00_00_00} " "{analysis_foo_bar_date_2026_07_01_00_00_00}", "{analysis_foo_bar_date_2027_01_01_00_00_00} " "{analysis_foo_bar_date_2027_07_01_00_00_00}", ] - assert filenames_list == expected_filenames_list - import ipdb + obtained_nodes_list = [ + ["initial_conditions", "forcing"], + ["initial_conditions", "forcing"], + ["icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00", "forcing"], + ["icon_restart_foo_1___bar_3_0___date_2026_01_01_00_00_00", "forcing"], + ["icon_restart_foo_0___bar_3_0___date_2026_07_01_00_00_00", "forcing"], + ["icon_restart_foo_1___bar_3_0___date_2026_07_01_00_00_00", "forcing"], + ["icon_restart_foo_0___bar_3_0___date_2027_01_01_00_00_00", "forcing"], + ["icon_restart_foo_1___bar_3_0___date_2027_01_01_00_00_00", "forcing"], + [ + "icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00", + "icon_output_foo_1___bar_3_0___date_2026_01_01_00_00_00", + ], + [ + "icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00", + "icon_output_foo_1___bar_3_0___date_2026_07_01_00_00_00", + ], + [ + "icon_output_foo_0___bar_3_0___date_2027_01_01_00_00_00", + "icon_output_foo_1___bar_3_0___date_2027_01_01_00_00_00", + ], + [ + "icon_output_foo_0___bar_3_0___date_2027_07_01_00_00_00", + "icon_output_foo_1___bar_3_0___date_2027_07_01_00_00_00", + ], + ["analysis_foo_bar_3_0___date_2026_01_01_00_00_00"], + ["analysis_foo_bar_3_0___date_2026_07_01_00_00_00"], + ["analysis_foo_bar_3_0___date_2027_01_01_00_00_00"], + ["analysis_foo_bar_3_0___date_2027_07_01_00_00_00"], + ["analysis_foo_bar_date_2026_01_01_00_00_00", "analysis_foo_bar_date_2026_07_01_00_00_00"], + ["analysis_foo_bar_date_2027_01_01_00_00_00", "analysis_foo_bar_date_2027_07_01_00_00_00"], + ] + + # import ipdb; ipdb.set_trace() + # assert filenames_list == obtained_filenames_list - ipdb.set_trace() + result = aiida_wf.run() From 4be222fdd6143d40f550b3cc08994dfa41761f4a Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Mon, 2 Jun 2025 15:27:58 +0200 Subject: [PATCH 10/27] Implementation seems to work now --- src/sirocco/workgraph.py | 78 ++++++------ tests/cases/parameters/config/config.yml | 9 +- tests/cases/remote/config/config.yml | 24 ++-- tests/test_workgraph.py | 154 +++++++++++++++++------ 4 files changed, 169 insertions(+), 96 deletions(-) diff --git a/src/sirocco/workgraph.py b/src/sirocco/workgraph.py index 3551ac30..95f9f4a9 100644 --- a/src/sirocco/workgraph.py +++ b/src/sirocco/workgraph.py @@ -9,12 +9,12 @@ import aiida_workgraph # type: ignore[import-untyped] # does not have proper typing and stubs import aiida_workgraph.tasks.factory.shelljob_task # type: ignore[import-untyped] # is only for a workaround from aiida.common.exceptions import NotExistent +from rich.pretty import pprint from sirocco import core if TYPE_CHECKING: from aiida_workgraph.socket import TaskSocket # type: ignore[import-untyped] - from aiida_workgraph.sockets.builtins import SocketAny WorkgraphDataNode: TypeAlias = aiida.orm.RemoteData | aiida.orm.SinglefileData | aiida.orm.FolderData @@ -27,16 +27,6 @@ def _execute(self, engine_process, args=None, kwargs=None, var_kwargs=None): # from aiida_shell import ShellJob from aiida_workgraph.utils import create_and_pause_process # type: ignore[import-untyped] - # Check here if possible to fix the command, currently looks like: - # { - # 'arguments': '--restart --init {initial_conditions} --forcing {forcing}', - # 'command': '/home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/icon.py', - # 'metadata': {'computer': , - # 'options': {'prepend_text': '', 'use_symlinks': True}}, - # 'nodes': {'forcing': , - # 'initial_conditions': }, - # 'outputs': [] - # } inputs = aiida_workgraph.tasks.factory.shelljob_task.prepare_for_shell_task(kwargs) # Workaround starts here @@ -49,11 +39,6 @@ def _execute(self, engine_process, args=None, kwargs=None, var_kwargs=None): # inputs["outputs"] = list(missing_outputs) # Workaround ends here - # `code_label` is constructed via: - # code_label = f'{command}@{computer.label}' - # In aiida-shell - # ShellCode inherits from InstalledCode... (what about PortableCode??) - inputs["metadata"].update({"call_link_label": self.name}) if self.action == "PAUSE": engine_process.report(f"Task {self.name} is created and paused.") @@ -317,10 +302,10 @@ def _link_wait_on_to_task(self, task: core.Task): self.task_from_core(task).wait = [self.task_from_core(wt) for wt in task.wait_on] def _set_shelljob_arguments(self, task: core.ShellTask): - """set AiiDA ShellJob arguments by replacing port placeholders by aiida labels""" - + """Set AiiDA ShellJob arguments by replacing port placeholders with AiiDA labels.""" workgraph_task = self.task_from_core(task) - workgraph_task_arguments: SocketAny = workgraph_task.inputs.arguments + workgraph_task_arguments = workgraph_task.inputs.arguments + if workgraph_task_arguments is None: msg = ( f"Workgraph task {workgraph_task.name!r} did not initialize arguments nodes in the workgraph " @@ -328,40 +313,57 @@ def _set_shelljob_arguments(self, task: core.ShellTask): ) raise ValueError(msg) - input_labels = {port: list(map(self.label_placeholder, task.inputs[port])) for port in task.inputs} + # Build input_labels dictionary for port resolution + input_labels = {} + for port_name, input_list in task.inputs.items(): + input_labels[port_name] = [] + for input_ in input_list: + # Use the full AiiDA label as the placeholder content + input_label = self.get_aiida_label_from_graph_item(input_) + input_labels[port_name].append(f"{{{input_label}}}") + + # Resolve the command with port placeholders replaced by input labels _, arguments = self.split_cmd_arg(task.resolve_ports(input_labels)) workgraph_task_arguments.value = arguments - def _set_shelljob_filenames(self, task: core.ShellTask): - """set AiiDA ShellJob filenames for Data entities""" + def _set_shelljob_filenames(self, task: core.ShellTask): + """Set AiiDA ShellJob filenames for data entities, including parameterized data.""" filenames = {} workgraph_task = self.task_from_core(task) + if not workgraph_task.inputs.filenames: - import ipdb; ipdb.set_trace() return for input_ in task.input_data_nodes(): + input_label = self.get_aiida_label_from_graph_item(input_) + if task.computer and input_.computer and isinstance(input_, core.AvailableData): + # For RemoteData on the same computer, use just the filename filename = Path(input_.src).name filenames[input_.name] = filename - # PRCOMMENT: GeneratedData has no explicit Computer attribute. - # This is explicit from task, or should we add that, as well? (probably not necessary) - elif task.computer and isinstance(input_, core.GeneratedData): - for input_k, input_v in task.inputs.items(): - if not input_v: - continue - if input_ == input_v[0]: - filename = self.label_placeholder(input_).strip('{').strip('}') - if input_k == "None": - filenames[str(input_.src)] = filename - else: - filenames[input_k] = filename - - if not filenames: - import ipdb; ipdb.set_trace() + else: + # For other cases (including GeneratedData), we need to handle parameterized data + # Importantly, multiple data nodes with the same base name but different + # coordinates need unique filenames to avoid conflicts in the working directory + + # Count how many inputs have the same base name + same_name_count = sum(1 for inp in task.input_data_nodes() if inp.name == input_.name) + + if same_name_count > 1: + # Multiple data nodes with same base name - use full label as filename + # to ensure uniqueness in working directory + filename = input_label + else: + # Single data node with this name - can use simple filename + filename = Path(input_.src).name if hasattr(input_, 'src') else input_.name + + # The key in filenames dict should be the input label (what's used in nodes dict) + filenames[input_label] = filename + workgraph_task.inputs.filenames.value = filenames + def run( self, inputs: None | dict[str, Any] = None, diff --git a/tests/cases/parameters/config/config.yml b/tests/cases/parameters/config/config.yml index 00b08425..477b02e0 100644 --- a/tests/cases/parameters/config/config.yml +++ b/tests/cases/parameters/config/config.yml @@ -1,6 +1,5 @@ ---- -start_date: &root_start_date '2026-01-01T00:00' -stop_date: &root_stop_date '2028-01-01T00:00' +start_date: &root_start_date "2026-01-01T00:00" +stop_date: &root_stop_date "2028-01-01T00:00" cycles: - bimonthly_tasks: @@ -49,7 +48,7 @@ cycles: inputs: - analysis_foo_bar: target_cycle: - lag: ['P0M', 'P6M'] + lag: ["P0M", "P6M"] port: None outputs: [yearly_analysis] @@ -58,7 +57,7 @@ tasks: plugin: shell # For relative path execpt if this cannot be resolved to a registered code # Probably shouldn't enforce to absolute path; or provide a code argument instead, this should be - # unambigous + # unambigous src: scripts/icon.py command: "icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}" parameters: [foo, bar] diff --git a/tests/cases/remote/config/config.yml b/tests/cases/remote/config/config.yml index 00b08425..c0b1023f 100644 --- a/tests/cases/remote/config/config.yml +++ b/tests/cases/remote/config/config.yml @@ -1,7 +1,5 @@ ---- -start_date: &root_start_date '2026-01-01T00:00' -stop_date: &root_stop_date '2028-01-01T00:00' - +start_date: &root_start_date "2026-01-01T00:00" +stop_date: &root_stop_date "2027-01-01T00:00" cycles: - bimonthly_tasks: cycling: @@ -49,41 +47,42 @@ cycles: inputs: - analysis_foo_bar: target_cycle: - lag: ['P0M', 'P6M'] + lag: ["P0M", "P6M"] port: None outputs: [yearly_analysis] - tasks: - icon: plugin: shell - # For relative path execpt if this cannot be resolved to a registered code - # Probably shouldn't enforce to absolute path; or provide a code argument instead, this should be - # unambigous src: scripts/icon.py command: "icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}" parameters: [foo, bar] + computer: localhost - statistics_foo: plugin: shell src: scripts/statistics.py command: "statistics.py {PORT::None}" parameters: [bar] + computer: localhost - statistics_foo_bar: plugin: shell src: scripts/statistics.py command: "statistics.py {PORT::None}" + computer: localhost - merge: plugin: shell src: scripts/merge.py command: "merge.py {PORT::None}" - + computer: localhost data: available: - initial_conditions: type: file - src: data/initial_conditions + src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/remote/config/data/initial_conditions + computer: localhost - forcing: type: file - src: data/forcing + src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/remote/config/data/forcing + computer: localhost generated: - icon_output: type: file @@ -103,7 +102,6 @@ data: - yearly_analysis: type: file src: analysis - parameters: foo: [0, 1] bar: [3.0] diff --git a/tests/test_workgraph.py b/tests/test_workgraph.py index 2da6ae40..d67fbb2d 100644 --- a/tests/test_workgraph.py +++ b/tests/test_workgraph.py @@ -23,13 +23,20 @@ def test_set_shelljob_filenames(tmp_path): tasks=[ models.ConfigCycleTask( name="task", - inputs=[models.ConfigCycleTaskInput(name="my_data", port="unused")], + inputs=[ + models.ConfigCycleTaskInput(name="my_data", port="unused") + ], ), ], ), ], tasks=[ - models.ConfigShellTask(name="task", command="echo test", src=str(script_path), computer="localhost"), + models.ConfigShellTask( + name="task", + command="echo test", + src=str(script_path), + computer="localhost", + ), ], data=models.ConfigData( available=[ @@ -70,7 +77,9 @@ def test_multiple_inputs_filenames(tmp_path): models.ConfigCycleTask( name="task", inputs=[ - models.ConfigCycleTaskInput(name=f"data_{i}", port=f"port_{i}") + models.ConfigCycleTaskInput( + name=f"data_{i}", port=f"port_{i}" + ) for i in range(len(file_names)) ], ), @@ -78,7 +87,12 @@ def test_multiple_inputs_filenames(tmp_path): ), ], tasks=[ - models.ConfigShellTask(name="task", command="echo test", src=str(script_path), computer="localhost"), + models.ConfigShellTask( + name="task", + command="echo test", + src=str(script_path), + computer="localhost", + ), ], data=models.ConfigData( available=[ @@ -118,13 +132,20 @@ def test_directory_input_filenames(tmp_path): tasks=[ models.ConfigCycleTask( name="task", - inputs=[models.ConfigCycleTaskInput(name="my_dir", port="unused")], + inputs=[ + models.ConfigCycleTaskInput(name="my_dir", port="unused") + ], ), ], ), ], tasks=[ - models.ConfigShellTask(name="task", command="echo test", src=str(script_path), computer="localhost"), + models.ConfigShellTask( + name="task", + command="echo test", + src=str(script_path), + computer="localhost", + ), ], data=models.ConfigData( available=[ @@ -266,37 +287,82 @@ def test_set_shelljob_filenames_parametrized(tmp_path): aiida_wf = AiidaWorkGraph(core_workflow=core_wf) filenames_list = [task.inputs.filenames.value for task in aiida_wf._workgraph.tasks] arguments_list = [task.inputs.arguments.value for task in aiida_wf._workgraph.tasks] - nodes_list = [list(task.inputs.nodes._sockets.keys()) for task in aiida_wf._workgraph.tasks] + nodes_list = [ + list(task.inputs.nodes._sockets.keys()) for task in aiida_wf._workgraph.tasks + ] - obtained_filenames_list = [ + expected_filenames_list = [ {"forcing": "forcing", "initial_conditions": "initial_conditions"}, {"forcing": "forcing", "initial_conditions": "initial_conditions"}, - {"forcing": "forcing", "restart": "icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00"}, - {"forcing": "forcing", "restart": "icon_restart_foo_1___bar_3_0___date_2026_01_01_00_00_00"}, - {"forcing": "forcing", "restart": "icon_restart_foo_0___bar_3_0___date_2026_07_01_00_00_00"}, - {"forcing": "forcing", "restart": "icon_restart_foo_1___bar_3_0___date_2026_07_01_00_00_00"}, - {"forcing": "forcing", "restart": "icon_restart_foo_0___bar_3_0___date_2027_01_01_00_00_00"}, - {"forcing": "forcing", "restart": "icon_restart_foo_1___bar_3_0___date_2027_01_01_00_00_00"}, - {"icon_output": "icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00"}, - {"icon_output": "icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00"}, - {"icon_output": "icon_output_foo_0___bar_3_0___date_2027_01_01_00_00_00"}, - {"icon_output": "icon_output_foo_0___bar_3_0___date_2027_07_01_00_00_00"}, - {"analysis": "analysis_foo_bar_3_0___date_2026_01_01_00_00_00"}, - {"analysis": "analysis_foo_bar_3_0___date_2026_07_01_00_00_00"}, - {"analysis": "analysis_foo_bar_3_0___date_2027_01_01_00_00_00"}, - {"analysis": "analysis_foo_bar_3_0___date_2027_07_01_00_00_00"}, - {"analysis": "analysis_foo_bar_date_2026_01_01_00_00_00"}, - {"analysis": "analysis_foo_bar_date_2027_01_01_00_00_00"}, + { + "forcing": "forcing", + "icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00": "restart", + }, + { + "forcing": "forcing", + "icon_restart_foo_1___bar_3_0___date_2026_01_01_00_00_00": "restart", + }, + { + "forcing": "forcing", + "icon_restart_foo_0___bar_3_0___date_2026_07_01_00_00_00": "restart", + }, + { + "forcing": "forcing", + "icon_restart_foo_1___bar_3_0___date_2026_07_01_00_00_00": "restart", + }, + { + "forcing": "forcing", + "icon_restart_foo_0___bar_3_0___date_2027_01_01_00_00_00": "restart", + }, + { + "forcing": "forcing", + "icon_restart_foo_1___bar_3_0___date_2027_01_01_00_00_00": "restart", + }, + { + "icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00": "icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00", + "icon_output_foo_1___bar_3_0___date_2026_01_01_00_00_00": "icon_output_foo_1___bar_3_0___date_2026_01_01_00_00_00", + }, + { + "icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00": "icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00", + "icon_output_foo_1___bar_3_0___date_2026_07_01_00_00_00": "icon_output_foo_1___bar_3_0___date_2026_07_01_00_00_00", + }, + { + "icon_output_foo_0___bar_3_0___date_2027_01_01_00_00_00": "icon_output_foo_0___bar_3_0___date_2027_01_01_00_00_00", + "icon_output_foo_1___bar_3_0___date_2027_01_01_00_00_00": "icon_output_foo_1___bar_3_0___date_2027_01_01_00_00_00", + }, + { + "icon_output_foo_0___bar_3_0___date_2027_07_01_00_00_00": "icon_output_foo_0___bar_3_0___date_2027_07_01_00_00_00", + "icon_output_foo_1___bar_3_0___date_2027_07_01_00_00_00": "icon_output_foo_1___bar_3_0___date_2027_07_01_00_00_00", + }, + {"analysis_foo_bar_3_0___date_2026_01_01_00_00_00": "analysis"}, + {"analysis_foo_bar_3_0___date_2026_07_01_00_00_00": "analysis"}, + {"analysis_foo_bar_3_0___date_2027_01_01_00_00_00": "analysis"}, + {"analysis_foo_bar_3_0___date_2027_07_01_00_00_00": "analysis"}, + { + "analysis_foo_bar_date_2026_01_01_00_00_00": "analysis_foo_bar_date_2026_01_01_00_00_00", + "analysis_foo_bar_date_2026_07_01_00_00_00": "analysis_foo_bar_date_2026_07_01_00_00_00", + }, + { + "analysis_foo_bar_date_2027_01_01_00_00_00": "analysis_foo_bar_date_2027_01_01_00_00_00", + "analysis_foo_bar_date_2027_07_01_00_00_00": "analysis_foo_bar_date_2027_07_01_00_00_00", + }, ] - obtained_arguments_list = [ + + expected_arguments_list = [ "--restart --init {initial_conditions} --forcing {forcing}", "--restart --init {initial_conditions} --forcing {forcing}", - "--restart {icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00} --init " "--forcing {forcing}", - "--restart {icon_restart_foo_1___bar_3_0___date_2026_01_01_00_00_00} --init " "--forcing {forcing}", - "--restart {icon_restart_foo_0___bar_3_0___date_2026_07_01_00_00_00} --init " "--forcing {forcing}", - "--restart {icon_restart_foo_1___bar_3_0___date_2026_07_01_00_00_00} --init " "--forcing {forcing}", - "--restart {icon_restart_foo_0___bar_3_0___date_2027_01_01_00_00_00} --init " "--forcing {forcing}", - "--restart {icon_restart_foo_1___bar_3_0___date_2027_01_01_00_00_00} --init " "--forcing {forcing}", + "--restart {icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00} --init " + "--forcing {forcing}", + "--restart {icon_restart_foo_1___bar_3_0___date_2026_01_01_00_00_00} --init " + "--forcing {forcing}", + "--restart {icon_restart_foo_0___bar_3_0___date_2026_07_01_00_00_00} --init " + "--forcing {forcing}", + "--restart {icon_restart_foo_1___bar_3_0___date_2026_07_01_00_00_00} --init " + "--forcing {forcing}", + "--restart {icon_restart_foo_0___bar_3_0___date_2027_01_01_00_00_00} --init " + "--forcing {forcing}", + "--restart {icon_restart_foo_1___bar_3_0___date_2027_01_01_00_00_00} --init " + "--forcing {forcing}", "{icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00} " "{icon_output_foo_1___bar_3_0___date_2026_01_01_00_00_00}", "{icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00} " @@ -309,10 +375,13 @@ def test_set_shelljob_filenames_parametrized(tmp_path): "{analysis_foo_bar_3_0___date_2026_07_01_00_00_00}", "{analysis_foo_bar_3_0___date_2027_01_01_00_00_00}", "{analysis_foo_bar_3_0___date_2027_07_01_00_00_00}", - "{analysis_foo_bar_date_2026_01_01_00_00_00} " "{analysis_foo_bar_date_2026_07_01_00_00_00}", - "{analysis_foo_bar_date_2027_01_01_00_00_00} " "{analysis_foo_bar_date_2027_07_01_00_00_00}", + "{analysis_foo_bar_date_2026_01_01_00_00_00} " + "{analysis_foo_bar_date_2026_07_01_00_00_00}", + "{analysis_foo_bar_date_2027_01_01_00_00_00} " + "{analysis_foo_bar_date_2027_07_01_00_00_00}", ] - obtained_nodes_list = [ + + expected_nodes_list = [ ["initial_conditions", "forcing"], ["initial_conditions", "forcing"], ["icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00", "forcing"], @@ -341,11 +410,16 @@ def test_set_shelljob_filenames_parametrized(tmp_path): ["analysis_foo_bar_3_0___date_2026_07_01_00_00_00"], ["analysis_foo_bar_3_0___date_2027_01_01_00_00_00"], ["analysis_foo_bar_3_0___date_2027_07_01_00_00_00"], - ["analysis_foo_bar_date_2026_01_01_00_00_00", "analysis_foo_bar_date_2026_07_01_00_00_00"], - ["analysis_foo_bar_date_2027_01_01_00_00_00", "analysis_foo_bar_date_2027_07_01_00_00_00"], + [ + "analysis_foo_bar_date_2026_01_01_00_00_00", + "analysis_foo_bar_date_2026_07_01_00_00_00", + ], + [ + "analysis_foo_bar_date_2027_01_01_00_00_00", + "analysis_foo_bar_date_2027_07_01_00_00_00", + ], ] - # import ipdb; ipdb.set_trace() - # assert filenames_list == obtained_filenames_list - - result = aiida_wf.run() + assert arguments_list == expected_arguments_list + assert filenames_list == expected_filenames_list + assert nodes_list == expected_nodes_list From 84677c7fa124c26f51624d1b78aff24aecb74e19 Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Mon, 2 Jun 2025 15:56:09 +0200 Subject: [PATCH 11/27] . --- .vscode/settings.json | 7 + src/sirocco/workgraph.py | 71 ++- tests/test_workgraph.py | 1130 ++++++++++++++++++++++++++------------ 3 files changed, 827 insertions(+), 381 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..3e99ede3 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} \ No newline at end of file diff --git a/src/sirocco/workgraph.py b/src/sirocco/workgraph.py index 95f9f4a9..54cdb29f 100644 --- a/src/sirocco/workgraph.py +++ b/src/sirocco/workgraph.py @@ -16,7 +16,9 @@ if TYPE_CHECKING: from aiida_workgraph.socket import TaskSocket # type: ignore[import-untyped] - WorkgraphDataNode: TypeAlias = aiida.orm.RemoteData | aiida.orm.SinglefileData | aiida.orm.FolderData + WorkgraphDataNode: TypeAlias = ( + aiida.orm.RemoteData | aiida.orm.SinglefileData | aiida.orm.FolderData + ) # This is a workaround required when splitting the initialization of the task and its linked nodes Merging this into @@ -32,7 +34,15 @@ def _execute(self, engine_process, args=None, kwargs=None, var_kwargs=None): # # Workaround starts here # This part is part of the workaround. We need to manually add the outputs from the task. # Because kwargs are not populated with outputs - default_outputs = {"remote_folder", "remote_stash", "retrieved", "_outputs", "_wait", "stdout", "stderr"} + default_outputs = { + "remote_folder", + "remote_stash", + "retrieved", + "_outputs", + "_wait", + "stdout", + "stderr", + } task_outputs = set(self.outputs._sockets.keys()) # noqa SLF001 # there so public accessor task_outputs = task_outputs.union(set(inputs.pop("outputs", []))) missing_outputs = task_outputs.difference(default_outputs) @@ -143,7 +153,8 @@ def get_aiida_label_from_graph_item(cls, obj: core.GraphItem) -> str: through the replacement of invalid chars in the coordinates duplication can happen but it is unlikely. """ return cls.replace_invalid_chars_in_label( - f"{obj.name}" + "__".join(f"_{key}_{value}" for key, value in obj.coordinates.items()) + f"{obj.name}" + + "__".join(f"_{key}_{value}" for key, value in obj.coordinates.items()) ) @staticmethod @@ -157,11 +168,17 @@ def split_cmd_arg(command_line: str) -> tuple[str, str]: def label_placeholder(cls, data: core.Data) -> str: return f"{{{cls.get_aiida_label_from_graph_item(data)}}}" - def data_from_core(self, core_available_data: core.AvailableData) -> WorkgraphDataNode: - return self._aiida_data_nodes[self.get_aiida_label_from_graph_item(core_available_data)] + def data_from_core( + self, core_available_data: core.AvailableData + ) -> WorkgraphDataNode: + return self._aiida_data_nodes[ + self.get_aiida_label_from_graph_item(core_available_data) + ] def socket_from_core(self, core_generated_data: core.GeneratedData) -> TaskSocket: - return self._aiida_socket_nodes[self.get_aiida_label_from_graph_item(core_generated_data)] + return self._aiida_socket_nodes[ + self.get_aiida_label_from_graph_item(core_generated_data) + ] def task_from_core(self, core_task: core.Task) -> aiida_workgraph.Task: return self._aiida_task_nodes[self.get_aiida_label_from_graph_item(core_task)] @@ -177,7 +194,11 @@ def _add_aiida_input_data_node(self, data: core.Data): Create an `aiida.orm.Data` instance from the provided graph item. """ label = self.get_aiida_label_from_graph_item(data) - data_full_path = data.src if data.src.is_absolute() else self._core_workflow.config_rootdir / data.src + data_full_path = ( + data.src + if data.src.is_absolute() + else self._core_workflow.config_rootdir / data.src + ) if data.computer is not None: try: @@ -190,9 +211,13 @@ def _add_aiida_input_data_node(self, data: core.Data): remote_path=str(data.src), label=label, computer=computer ) elif data.type == "file": - self._aiida_data_nodes[label] = aiida.orm.SinglefileData(label=label, file=data_full_path) + self._aiida_data_nodes[label] = aiida.orm.SinglefileData( + label=label, file=data_full_path + ) elif data.type == "dir": - self._aiida_data_nodes[label] = aiida.orm.FolderData(label=label, tree=data_full_path) + self._aiida_data_nodes[label] = aiida.orm.FolderData( + label=label, tree=data_full_path + ) else: msg = f"Data type {data.type!r} not supported. Please use 'file' or 'dir'." raise ValueError(msg) @@ -231,7 +256,9 @@ def _create_shell_task_node(self, task: core.ShellTask): else (task.config_rootdir / env_source_path) for env_source_file in task.env_source_files ] - prepend_text = "\n".join([f"source {env_source_path}" for env_source_path in env_source_paths]) + prepend_text = "\n".join( + [f"source {env_source_path}" for env_source_path in env_source_paths] + ) metadata["options"] = {"prepend_text": prepend_text} # NOTE: Hardcoded for now, possibly make user-facing option metadata["options"]["use_symlinks"] = True @@ -299,7 +326,9 @@ def _link_input_node_to_shelltask(self, task: core.ShellTask, input_: core.Data) def _link_wait_on_to_task(self, task: core.Task): """link wait on tasks to workgraph task""" - self.task_from_core(task).wait = [self.task_from_core(wt) for wt in task.wait_on] + self.task_from_core(task).wait = [ + self.task_from_core(wt) for wt in task.wait_on + ] def _set_shelljob_arguments(self, task: core.ShellTask): """Set AiiDA ShellJob arguments by replacing port placeholders with AiiDA labels.""" @@ -326,7 +355,6 @@ def _set_shelljob_arguments(self, task: core.ShellTask): _, arguments = self.split_cmd_arg(task.resolve_ports(input_labels)) workgraph_task_arguments.value = arguments - def _set_shelljob_filenames(self, task: core.ShellTask): """Set AiiDA ShellJob filenames for data entities, including parameterized data.""" filenames = {} @@ -338,7 +366,11 @@ def _set_shelljob_filenames(self, task: core.ShellTask): for input_ in task.input_data_nodes(): input_label = self.get_aiida_label_from_graph_item(input_) - if task.computer and input_.computer and isinstance(input_, core.AvailableData): + if ( + task.computer + and input_.computer + and isinstance(input_, core.AvailableData) + ): # For RemoteData on the same computer, use just the filename filename = Path(input_.src).name filenames[input_.name] = filename @@ -348,7 +380,9 @@ def _set_shelljob_filenames(self, task: core.ShellTask): # coordinates need unique filenames to avoid conflicts in the working directory # Count how many inputs have the same base name - same_name_count = sum(1 for inp in task.input_data_nodes() if inp.name == input_.name) + same_name_count = sum( + 1 for inp in task.input_data_nodes() if inp.name == input_.name + ) if same_name_count > 1: # Multiple data nodes with same base name - use full label as filename @@ -356,14 +390,15 @@ def _set_shelljob_filenames(self, task: core.ShellTask): filename = input_label else: # Single data node with this name - can use simple filename - filename = Path(input_.src).name if hasattr(input_, 'src') else input_.name + filename = ( + Path(input_.src).name if hasattr(input_, "src") else input_.name + ) # The key in filenames dict should be the input label (what's used in nodes dict) filenames[input_label] = filename workgraph_task.inputs.filenames.value = filenames - def run( self, inputs: None | dict[str, Any] = None, @@ -385,7 +420,9 @@ def submit( timeout: int = 60, metadata: None | dict[str, Any] = None, ) -> aiida.orm.Node: - self._workgraph.submit(inputs=inputs, wait=wait, timeout=timeout, metadata=metadata) + self._workgraph.submit( + inputs=inputs, wait=wait, timeout=timeout, metadata=metadata + ) if (output_node := self._workgraph.process) is None: # The node should not be None after a run, it should contain exit code and message so if the node is None something internal went wrong msg = "Something went wrong when running workgraph. Please contact a developer." diff --git a/tests/test_workgraph.py b/tests/test_workgraph.py index d67fbb2d..49e9c572 100644 --- a/tests/test_workgraph.py +++ b/tests/test_workgraph.py @@ -1,148 +1,112 @@ import pytest +from pathlib import Path from aiida import orm +import textwrap -from sirocco.core import Workflow +from sirocco.core import Workflow, AvailableData, GeneratedData from sirocco.parsing import yaml_data_models as models from sirocco.workgraph import AiidaWorkGraph -import textwrap -@pytest.mark.usefixtures("aiida_localhost") -def test_set_shelljob_filenames(tmp_path): - file_name = "foo.txt" - file_path = tmp_path / file_name - # Dummy script, as `src` must be specified due to relative command path - script_path = tmp_path / "my_script.sh" +def test_get_aiida_label_from_graph_item(tmp_path): + """Test that AiiDA labels are generated correctly.""" - config_wf = models.ConfigWorkflow( - name="remote", - rootdir=tmp_path, - cycles=[ - models.ConfigCycle( - name="remote", - tasks=[ - models.ConfigCycleTask( - name="task", - inputs=[ - models.ConfigCycleTaskInput(name="my_data", port="unused") - ], - ), - ], - ), - ], - tasks=[ - models.ConfigShellTask( - name="task", - command="echo test", - src=str(script_path), - computer="localhost", - ), - ], - data=models.ConfigData( - available=[ - models.ConfigAvailableData( - name="my_data", - type=models.DataType.FILE, - src=str(file_path), - computer="localhost", - ) - ], - ), - parameters={}, + # Mock data nodes with different coordinate combinations + output_path = tmp_path / "output" + data_simple = GeneratedData( + name="output", type=models.DataType.FILE, src=output_path, coordinates={} ) - core_wf = Workflow.from_config_workflow(config_workflow=config_wf) - aiida_wf = AiidaWorkGraph(core_workflow=core_wf) - remote_data = aiida_wf._workgraph.tasks[0].inputs.nodes["my_data"].value # noqa: SLF001 - assert isinstance(remote_data, orm.RemoteData) - filenames = aiida_wf._workgraph.tasks[0].inputs.filenames.value # noqa: SLF001 - assert filenames == {"my_data": "foo.txt"} + data_with_date = GeneratedData( + name="output", + type=models.DataType.FILE, + src=output_path, + coordinates={"date": "2026-01-01-00:00:00"}, + ) + data_with_params = GeneratedData( + name="output", + type=models.DataType.FILE, + src=output_path, + coordinates={"foo": 0, "bar": 3.0, "date": "2026-01-01-00:00:00"}, + ) -@pytest.mark.usefixtures("aiida_localhost") -def test_multiple_inputs_filenames(tmp_path): - file_names = ["foo.txt", "bar.txt", "baz.dat"] - for name in file_names: - (tmp_path / name).touch() - script_path = tmp_path / "my_script.sh" + # Test label generation + assert AiidaWorkGraph.get_aiida_label_from_graph_item(data_simple) == "output" + assert ( + AiidaWorkGraph.get_aiida_label_from_graph_item(data_with_date) + == "output_date_2026_01_01_00_00_00" + ) + assert ( + AiidaWorkGraph.get_aiida_label_from_graph_item(data_with_params) + == "output_foo_0___bar_3_0___date_2026_01_01_00_00_00" + ) - # Create configuration with multiple inputs - config_wf = models.ConfigWorkflow( - name="remote", - rootdir=tmp_path, - cycles=[ - models.ConfigCycle( - name="remote", - tasks=[ - models.ConfigCycleTask( - name="task", - inputs=[ - models.ConfigCycleTaskInput( - name=f"data_{i}", port=f"port_{i}" - ) - for i in range(len(file_names)) - ], - ), - ], - ), - ], - tasks=[ - models.ConfigShellTask( - name="task", - command="echo test", - src=str(script_path), - computer="localhost", - ), - ], - data=models.ConfigData( - available=[ - models.ConfigAvailableData( - name=f"data_{i}", - type=models.DataType.FILE, - src=name, - computer="localhost", - ) - for i, name in enumerate(file_names) - ], + +def test_filename_conflict_detection(tmp_path): + """Test logic for detecting when unique filenames are needed.""" + + output_path = tmp_path / "output" + other_path = tmp_path / "other" + + inputs = [ + GeneratedData( + name="output", + type=models.DataType.FILE, + src=output_path, + coordinates={"foo": 0}, ), - parameters={}, - ) + GeneratedData( + name="output", + type=models.DataType.FILE, + src=output_path, + coordinates={"foo": 1}, + ), + GeneratedData( + name="other", + type=models.DataType.FILE, + src=other_path, + coordinates={}, + ), + ] - core_wf = Workflow.from_config_workflow(config_workflow=config_wf) - aiida_wf = AiidaWorkGraph(core_workflow=core_wf) + # Test that conflict detection works + output_conflicts = [inp for inp in inputs if inp.name == "output"] + other_conflicts = [inp for inp in inputs if inp.name == "other"] - obtained_filenames = {f"data_{i}": name for i, name in enumerate(file_names)} - filenames = aiida_wf._workgraph.tasks[0].inputs.filenames.value # noqa: SLF001 - assert filenames == obtained_filenames + assert len(output_conflicts) == 2 # Should need unique filenames + assert len(other_conflicts) == 1 # Should use simple filename -@pytest.mark.usefixtures("aiida_localhost") -def test_directory_input_filenames(tmp_path): - dir_name = "test_dir" - dir_path = tmp_path / dir_name - dir_path.mkdir() - script_path = tmp_path / "my_script.sh" + +@pytest.mark.usefixtures('aiida_localhost') +def test_basic_remote_data_filename(tmp_path): + """Test basic RemoteData filename handling.""" + file_path = tmp_path / "foo.txt" + file_path.touch() + script_path = tmp_path / "script.sh" + script_path.touch() config_wf = models.ConfigWorkflow( - name="remote", + name="basic", rootdir=tmp_path, cycles=[ models.ConfigCycle( - name="remote", + name="cycle", tasks=[ models.ConfigCycleTask( name="task", inputs=[ - models.ConfigCycleTaskInput(name="my_dir", port="unused") + models.ConfigCycleTaskInput(name="data", port="input") ], - ), + ) ], ), ], tasks=[ models.ConfigShellTask( name="task", - command="echo test", + command="echo {PORT::input}", src=str(script_path), computer="localhost", ), @@ -150,276 +114,714 @@ def test_directory_input_filenames(tmp_path): data=models.ConfigData( available=[ models.ConfigAvailableData( - name="my_dir", - type=models.DataType.DIR, - src=dir_name, + name="data", + type=models.DataType.FILE, + src=str(file_path), computer="localhost", ) ], ), - parameters={}, ) - core_wf = Workflow.from_config_workflow(config_workflow=config_wf) - aiida_wf = AiidaWorkGraph(core_workflow=core_wf) + core_wf = Workflow.from_config_workflow(config_wf) + aiida_wf = AiidaWorkGraph(core_wf) - filenames = aiida_wf._workgraph.tasks[0].inputs.filenames.value # noqa: SLF001 - assert filenames == {"my_dir": dir_name} + # Check that RemoteData was created and filename is correct + task = aiida_wf._workgraph.tasks[0] + assert isinstance(task.inputs.nodes["data"].value, orm.RemoteData) + assert task.inputs.filenames.value == {"data": "foo.txt"} + assert task.inputs.arguments.value == "{data}" + +def test_parameterized_filename_conflicts(tmp_path): + """Test that parameterized data gets unique filenames when conflicts occur.""" + yaml_content = textwrap.dedent(""" + cycles: + - test_cycle: + tasks: + - process_data: + inputs: + - simulation_output: + parameters: + param: all + port: files + outputs: [processed_output] + tasks: + - process_data: + plugin: shell + command: "process.py {PORT::files}" + parameters: [param] + computer: localhost + data: + available: + - input_file: + type: file + src: input.txt + computer: localhost + generated: + - simulation_output: + type: file + src: output.dat + parameters: [param] + - processed_output: + type: file + src: processed.dat + parameters: [param] + parameters: + param: [1, 2] + """) + + config_file = tmp_path / "config.yml" + config_file.write_text(yaml_content) + + # Create required files + (tmp_path / "input.txt").touch() + (tmp_path / "process.py").touch() + + core_wf = Workflow.from_config_file(str(config_file)) + aiida_wf = AiidaWorkGraph(core_wf) + + # Find the task that processes multiple parameterized inputs + process_tasks = [ + task + for task in aiida_wf._workgraph.tasks + if task.name.startswith("process_data") + ] + + # Should have 2 tasks (one for each parameter value) + assert len(process_tasks) == 2 + + # Check that each task has unique node keys and appropriate filenames + for task in process_tasks: + nodes_keys = list(task.inputs.nodes._sockets.keys()) + filenames = task.inputs.filenames.value + arguments = task.inputs.arguments.value + + # Each task should have exactly one simulation_output input + sim_output_keys = [ + k for k in nodes_keys if k.startswith("simulation_output") + ] + assert len(sim_output_keys) == 1 + + # The filename should be the full label (since there are conflicts) + key = sim_output_keys[0] + assert filenames[key] == key # Full label used as filename + assert key in arguments # Key appears in arguments + +def test_mixed_conflict_and_no_conflict(tmp_path): + """Test workflow with both conflicting and non-conflicting data.""" + yaml_content = textwrap.dedent(""" + cycles: + - test_cycle: + tasks: + - analyze: + inputs: + - shared_config: # Single file, no conflict + port: config + - simulation_data: # Multiple files, conflict expected + parameters: + run: all + port: data + outputs: [analysis_result] + tasks: + - analyze: + plugin: shell + command: "analyze.py --config {PORT::config} --data {PORT::data}" + parameters: [run] + computer: localhost + data: + available: + - shared_config: + type: file + src: config.json + computer: localhost + generated: + - simulation_data: + type: file + src: sim_output.nc + parameters: [run] + - analysis_result: + type: file + src: result.txt + parameters: [run] + parameters: + run: [1, 2] + """) + + config_file = tmp_path / "config.yml" + config_file.write_text(yaml_content) + + # Create files + (tmp_path / "config.json").touch() + (tmp_path / "analyze.py").touch() + + core_wf = Workflow.from_config_file(str(config_file)) + aiida_wf = AiidaWorkGraph(core_wf) + + analyze_tasks = [ + task + for task in aiida_wf._workgraph.tasks + if task.name.startswith("analyze") + ] + + for task in analyze_tasks: + filenames = task.inputs.filenames.value + + # shared_config should use simple filename (no conflict) + assert filenames["shared_config"] == "config.json" + + # simulation_data should use full label (conflict with other tasks) + sim_data_key = [ + k for k in filenames.keys() if k.startswith("simulation_data") + ][0] + assert filenames[sim_data_key] == sim_data_key # Full label as filename + + +# ============================================================================ +# COMPREHENSIVE TEST - Keep one comprehensive test for the full scenario +# ============================================================================ @pytest.mark.usefixtures("aiida_localhost") -def test_set_shelljob_filenames_parametrized(tmp_path): - yaml_str = textwrap.dedent( - """ - start_date: &root_start_date "2026-01-01T00:00" - stop_date: &root_stop_date "2028-01-01T00:00" +def test_comprehensive_parameterized_workflow(tmp_path): + """Comprehensive test covering the full parameterized workflow scenario. + + This test validates the complete integration including: + - Multiple parameters (foo, bar) and dates + - Mixed conflict/no-conflict scenarios + - Correct argument resolution + - Proper filename mapping + """ + yaml_str = textwrap.dedent(""" + start_date: &start "2026-01-01T00:00" + stop_date: &stop "2026-07-01T00:00" cycles: - - bimonthly_tasks: - cycling: - start_date: *root_start_date - stop_date: *root_stop_date - period: P6M - tasks: - - icon: - inputs: - - initial_conditions: - when: - at: *root_start_date - port: init - - icon_restart: - when: - after: *root_start_date - target_cycle: - lag: -P6M - parameters: - foo: single - bar: single - port: restart - - forcing: - port: forcing - outputs: [icon_output, icon_restart] - - statistics_foo: - inputs: - - icon_output: - parameters: - bar: single - port: None - outputs: [analysis_foo] - - statistics_foo_bar: - inputs: - - analysis_foo: - port: None - outputs: [analysis_foo_bar] - - yearly: - cycling: - start_date: *root_start_date - stop_date: *root_stop_date - period: P1Y - tasks: - - merge: - inputs: - - analysis_foo_bar: - target_cycle: - lag: ["P0M", "P6M"] - port: None - outputs: [yearly_analysis] + - main: + cycling: + start_date: *start + stop_date: *stop + period: P6M + tasks: + - simulate: + inputs: + - config: + port: cfg + outputs: [sim_output] + - analyze: + inputs: + - sim_output: + parameters: {foo: all, bar: single} + port: data + outputs: [analysis] tasks: - - icon: - plugin: shell - src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/icon.py - command: "icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}" - parameters: [foo, bar] + - simulate: + plugin: shell + command: "sim.py {PORT::cfg}" + parameters: [foo, bar] + computer: localhost + - analyze: + plugin: shell + command: "analyze.py {PORT::data}" + parameters: [bar] + computer: localhost + data: + available: + - config: + type: file + src: config.txt computer: localhost - - statistics_foo: - plugin: shell - src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/statistics.py - command: "statistics.py {PORT::None}" + generated: + - sim_output: + type: file + src: output.dat + parameters: [foo, bar] + - analysis: + type: file + src: analysis.txt parameters: [bar] - computer: localhost - - statistics_foo_bar: - plugin: shell - src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/statistics.py - command: "statistics.py {PORT::None}" - computer: localhost - - merge: - plugin: shell - src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/merge.py - command: "merge.py {PORT::None}" - computer: localhost - data: - available: - - initial_conditions: - type: file - src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/small/config/data/initial_conditions - computer: localhost - - forcing: - type: file - src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/data/forcing - computer: localhost - generated: - - icon_output: - type: file - src: icon_output - parameters: [foo, bar] - - icon_restart: - type: file - src: restart - parameters: [foo, bar] - - analysis_foo: - type: file - src: analysis - parameters: [bar] - - analysis_foo_bar: - type: file - src: analysis - - yearly_analysis: - type: file - src: analysis parameters: - foo: [0, 1] - bar: [3.0] - """ - ) - yaml_file = tmp_path / "config.yml" - yaml_file.write_text(yaml_str) - - core_wf = Workflow.from_config_file(yaml_file) - aiida_wf = AiidaWorkGraph(core_workflow=core_wf) - filenames_list = [task.inputs.filenames.value for task in aiida_wf._workgraph.tasks] - arguments_list = [task.inputs.arguments.value for task in aiida_wf._workgraph.tasks] - nodes_list = [ - list(task.inputs.nodes._sockets.keys()) for task in aiida_wf._workgraph.tasks - ] + foo: [0, 1] + bar: [3.0] + """) - expected_filenames_list = [ - {"forcing": "forcing", "initial_conditions": "initial_conditions"}, - {"forcing": "forcing", "initial_conditions": "initial_conditions"}, - { - "forcing": "forcing", - "icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00": "restart", - }, - { - "forcing": "forcing", - "icon_restart_foo_1___bar_3_0___date_2026_01_01_00_00_00": "restart", - }, - { - "forcing": "forcing", - "icon_restart_foo_0___bar_3_0___date_2026_07_01_00_00_00": "restart", - }, - { - "forcing": "forcing", - "icon_restart_foo_1___bar_3_0___date_2026_07_01_00_00_00": "restart", - }, - { - "forcing": "forcing", - "icon_restart_foo_0___bar_3_0___date_2027_01_01_00_00_00": "restart", - }, - { - "forcing": "forcing", - "icon_restart_foo_1___bar_3_0___date_2027_01_01_00_00_00": "restart", - }, - { - "icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00": "icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00", - "icon_output_foo_1___bar_3_0___date_2026_01_01_00_00_00": "icon_output_foo_1___bar_3_0___date_2026_01_01_00_00_00", - }, - { - "icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00": "icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00", - "icon_output_foo_1___bar_3_0___date_2026_07_01_00_00_00": "icon_output_foo_1___bar_3_0___date_2026_07_01_00_00_00", - }, - { - "icon_output_foo_0___bar_3_0___date_2027_01_01_00_00_00": "icon_output_foo_0___bar_3_0___date_2027_01_01_00_00_00", - "icon_output_foo_1___bar_3_0___date_2027_01_01_00_00_00": "icon_output_foo_1___bar_3_0___date_2027_01_01_00_00_00", - }, - { - "icon_output_foo_0___bar_3_0___date_2027_07_01_00_00_00": "icon_output_foo_0___bar_3_0___date_2027_07_01_00_00_00", - "icon_output_foo_1___bar_3_0___date_2027_07_01_00_00_00": "icon_output_foo_1___bar_3_0___date_2027_07_01_00_00_00", - }, - {"analysis_foo_bar_3_0___date_2026_01_01_00_00_00": "analysis"}, - {"analysis_foo_bar_3_0___date_2026_07_01_00_00_00": "analysis"}, - {"analysis_foo_bar_3_0___date_2027_01_01_00_00_00": "analysis"}, - {"analysis_foo_bar_3_0___date_2027_07_01_00_00_00": "analysis"}, - { - "analysis_foo_bar_date_2026_01_01_00_00_00": "analysis_foo_bar_date_2026_01_01_00_00_00", - "analysis_foo_bar_date_2026_07_01_00_00_00": "analysis_foo_bar_date_2026_07_01_00_00_00", - }, - { - "analysis_foo_bar_date_2027_01_01_00_00_00": "analysis_foo_bar_date_2027_01_01_00_00_00", - "analysis_foo_bar_date_2027_07_01_00_00_00": "analysis_foo_bar_date_2027_07_01_00_00_00", - }, - ] + config_file = tmp_path / "config.yml" + config_file.write_text(yaml_str) - expected_arguments_list = [ - "--restart --init {initial_conditions} --forcing {forcing}", - "--restart --init {initial_conditions} --forcing {forcing}", - "--restart {icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00} --init " - "--forcing {forcing}", - "--restart {icon_restart_foo_1___bar_3_0___date_2026_01_01_00_00_00} --init " - "--forcing {forcing}", - "--restart {icon_restart_foo_0___bar_3_0___date_2026_07_01_00_00_00} --init " - "--forcing {forcing}", - "--restart {icon_restart_foo_1___bar_3_0___date_2026_07_01_00_00_00} --init " - "--forcing {forcing}", - "--restart {icon_restart_foo_0___bar_3_0___date_2027_01_01_00_00_00} --init " - "--forcing {forcing}", - "--restart {icon_restart_foo_1___bar_3_0___date_2027_01_01_00_00_00} --init " - "--forcing {forcing}", - "{icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00} " - "{icon_output_foo_1___bar_3_0___date_2026_01_01_00_00_00}", - "{icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00} " - "{icon_output_foo_1___bar_3_0___date_2026_07_01_00_00_00}", - "{icon_output_foo_0___bar_3_0___date_2027_01_01_00_00_00} " - "{icon_output_foo_1___bar_3_0___date_2027_01_01_00_00_00}", - "{icon_output_foo_0___bar_3_0___date_2027_07_01_00_00_00} " - "{icon_output_foo_1___bar_3_0___date_2027_07_01_00_00_00}", - "{analysis_foo_bar_3_0___date_2026_01_01_00_00_00}", - "{analysis_foo_bar_3_0___date_2026_07_01_00_00_00}", - "{analysis_foo_bar_3_0___date_2027_01_01_00_00_00}", - "{analysis_foo_bar_3_0___date_2027_07_01_00_00_00}", - "{analysis_foo_bar_date_2026_01_01_00_00_00} " - "{analysis_foo_bar_date_2026_07_01_00_00_00}", - "{analysis_foo_bar_date_2027_01_01_00_00_00} " - "{analysis_foo_bar_date_2027_07_01_00_00_00}", - ] + # Create files + (tmp_path / "config.txt").touch() + (tmp_path / "sim.py").touch() + (tmp_path / "analyze.py").touch() - expected_nodes_list = [ - ["initial_conditions", "forcing"], - ["initial_conditions", "forcing"], - ["icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00", "forcing"], - ["icon_restart_foo_1___bar_3_0___date_2026_01_01_00_00_00", "forcing"], - ["icon_restart_foo_0___bar_3_0___date_2026_07_01_00_00_00", "forcing"], - ["icon_restart_foo_1___bar_3_0___date_2026_07_01_00_00_00", "forcing"], - ["icon_restart_foo_0___bar_3_0___date_2027_01_01_00_00_00", "forcing"], - ["icon_restart_foo_1___bar_3_0___date_2027_01_01_00_00_00", "forcing"], - [ - "icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00", - "icon_output_foo_1___bar_3_0___date_2026_01_01_00_00_00", - ], - [ - "icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00", - "icon_output_foo_1___bar_3_0___date_2026_07_01_00_00_00", - ], - [ - "icon_output_foo_0___bar_3_0___date_2027_01_01_00_00_00", - "icon_output_foo_1___bar_3_0___date_2027_01_01_00_00_00", - ], - [ - "icon_output_foo_0___bar_3_0___date_2027_07_01_00_00_00", - "icon_output_foo_1___bar_3_0___date_2027_07_01_00_00_00", - ], - ["analysis_foo_bar_3_0___date_2026_01_01_00_00_00"], - ["analysis_foo_bar_3_0___date_2026_07_01_00_00_00"], - ["analysis_foo_bar_3_0___date_2027_01_01_00_00_00"], - ["analysis_foo_bar_3_0___date_2027_07_01_00_00_00"], - [ - "analysis_foo_bar_date_2026_01_01_00_00_00", - "analysis_foo_bar_date_2026_07_01_00_00_00", - ], - [ - "analysis_foo_bar_date_2027_01_01_00_00_00", - "analysis_foo_bar_date_2027_07_01_00_00_00", - ], + core_wf = Workflow.from_config_file(str(config_file)) + aiida_wf = AiidaWorkGraph(core_wf) + + # Verify task structure + sim_tasks = [t for t in aiida_wf._workgraph.tasks if t.name.startswith("simulate")] + analyze_tasks = [ + t for t in aiida_wf._workgraph.tasks if t.name.startswith("analyze") ] - assert arguments_list == expected_arguments_list - assert filenames_list == expected_filenames_list - assert nodes_list == expected_nodes_list + assert len(sim_tasks) == 2 # 2 foo values × 1 bar value = 2 tasks + assert len(analyze_tasks) == 1 # 1 bar value = 1 task + + # Check simulate tasks (should have simple config filename) + for task in sim_tasks: + filenames = task.inputs.filenames.value + assert filenames["config"] == "config.txt" # No conflict, simple name + + # Check analyze task (should have complex filenames due to conflicts) + analyze_task = analyze_tasks[0] + filenames = analyze_task.inputs.filenames.value + + # Should have 2 sim_output inputs with full labels as filenames + sim_output_keys = [k for k in filenames.keys() if k.startswith("sim_output")] + assert len(sim_output_keys) == 2 + + for key in sim_output_keys: + assert filenames[key] == key # Full label used as filename + assert "foo_" in key and "bar_3_0" in key # Contains parameter info + + +# ============================================================================ +# FIXTURES +# ============================================================================ + + +@pytest.fixture +def sample_workflow_config(tmp_path): + """Fixture providing a reusable workflow configuration for testing.""" + config = { + "name": "test_workflow", + "rootdir": tmp_path, + "cycles": [], + "tasks": [], + "data": models.ConfigData(), + "parameters": {}, + } + return config + + +# import pytest +# from aiida import orm + +# from sirocco.core import Workflow +# from sirocco.parsing import yaml_data_models as models +# from sirocco.workgraph import AiidaWorkGraph +# import textwrap + + +# @pytest.mark.usefixtures("aiida_localhost") +# def test_set_shelljob_filenames(tmp_path): +# file_name = "foo.txt" +# file_path = tmp_path / file_name +# # Dummy script, as `src` must be specified due to relative command path +# script_path = tmp_path / "my_script.sh" + +# config_wf = models.ConfigWorkflow( +# name="remote", +# rootdir=tmp_path, +# cycles=[ +# models.ConfigCycle( +# name="remote", +# tasks=[ +# models.ConfigCycleTask( +# name="task", +# inputs=[ +# models.ConfigCycleTaskInput(name="my_data", port="unused") +# ], +# ), +# ], +# ), +# ], +# tasks=[ +# models.ConfigShellTask( +# name="task", +# command="echo test", +# src=str(script_path), +# computer="localhost", +# ), +# ], +# data=models.ConfigData( +# available=[ +# models.ConfigAvailableData( +# name="my_data", +# type=models.DataType.FILE, +# src=str(file_path), +# computer="localhost", +# ) +# ], +# ), +# parameters={}, +# ) + +# core_wf = Workflow.from_config_workflow(config_workflow=config_wf) +# aiida_wf = AiidaWorkGraph(core_workflow=core_wf) +# remote_data = aiida_wf._workgraph.tasks[0].inputs.nodes["my_data"].value # noqa: SLF001 +# assert isinstance(remote_data, orm.RemoteData) +# filenames = aiida_wf._workgraph.tasks[0].inputs.filenames.value # noqa: SLF001 +# assert filenames == {"my_data": "foo.txt"} + + +# @pytest.mark.usefixtures("aiida_localhost") +# def test_multiple_inputs_filenames(tmp_path): +# file_names = ["foo.txt", "bar.txt", "baz.dat"] +# for name in file_names: +# (tmp_path / name).touch() +# script_path = tmp_path / "my_script.sh" + +# # Create configuration with multiple inputs +# config_wf = models.ConfigWorkflow( +# name="remote", +# rootdir=tmp_path, +# cycles=[ +# models.ConfigCycle( +# name="remote", +# tasks=[ +# models.ConfigCycleTask( +# name="task", +# inputs=[ +# models.ConfigCycleTaskInput( +# name=f"data_{i}", port=f"port_{i}" +# ) +# for i in range(len(file_names)) +# ], +# ), +# ], +# ), +# ], +# tasks=[ +# models.ConfigShellTask( +# name="task", +# command="echo test", +# src=str(script_path), +# computer="localhost", +# ), +# ], +# data=models.ConfigData( +# available=[ +# models.ConfigAvailableData( +# name=f"data_{i}", +# type=models.DataType.FILE, +# src=name, +# computer="localhost", +# ) +# for i, name in enumerate(file_names) +# ], +# ), +# parameters={}, +# ) + +# core_wf = Workflow.from_config_workflow(config_workflow=config_wf) +# aiida_wf = AiidaWorkGraph(core_workflow=core_wf) + +# obtained_filenames = {f"data_{i}": name for i, name in enumerate(file_names)} +# filenames = aiida_wf._workgraph.tasks[0].inputs.filenames.value # noqa: SLF001 +# assert filenames == obtained_filenames + + +# @pytest.mark.usefixtures("aiida_localhost") +# def test_directory_input_filenames(tmp_path): +# dir_name = "test_dir" +# dir_path = tmp_path / dir_name +# dir_path.mkdir() +# script_path = tmp_path / "my_script.sh" + +# config_wf = models.ConfigWorkflow( +# name="remote", +# rootdir=tmp_path, +# cycles=[ +# models.ConfigCycle( +# name="remote", +# tasks=[ +# models.ConfigCycleTask( +# name="task", +# inputs=[ +# models.ConfigCycleTaskInput(name="my_dir", port="unused") +# ], +# ), +# ], +# ), +# ], +# tasks=[ +# models.ConfigShellTask( +# name="task", +# command="echo test", +# src=str(script_path), +# computer="localhost", +# ), +# ], +# data=models.ConfigData( +# available=[ +# models.ConfigAvailableData( +# name="my_dir", +# type=models.DataType.DIR, +# src=dir_name, +# computer="localhost", +# ) +# ], +# ), +# parameters={}, +# ) + +# core_wf = Workflow.from_config_workflow(config_workflow=config_wf) +# aiida_wf = AiidaWorkGraph(core_workflow=core_wf) + +# filenames = aiida_wf._workgraph.tasks[0].inputs.filenames.value # noqa: SLF001 +# assert filenames == {"my_dir": dir_name} + + +# @pytest.mark.usefixtures("aiida_localhost") +# def test_linking_complex_dates_and_parameters(tmp_path): +# yaml_str = textwrap.dedent( +# """ +# start_date: &root_start_date "2026-01-01T00:00" +# stop_date: &root_stop_date "2028-01-01T00:00" +# cycles: +# - bimonthly_tasks: +# cycling: +# start_date: *root_start_date +# stop_date: *root_stop_date +# period: P6M +# tasks: +# - icon: +# inputs: +# - initial_conditions: +# when: +# at: *root_start_date +# port: init +# - icon_restart: +# when: +# after: *root_start_date +# target_cycle: +# lag: -P6M +# parameters: +# foo: single +# bar: single +# port: restart +# - forcing: +# port: forcing +# outputs: [icon_output, icon_restart] +# - statistics_foo: +# inputs: +# - icon_output: +# parameters: +# bar: single +# port: None +# outputs: [analysis_foo] +# - statistics_foo_bar: +# inputs: +# - analysis_foo: +# port: None +# outputs: [analysis_foo_bar] +# - yearly: +# cycling: +# start_date: *root_start_date +# stop_date: *root_stop_date +# period: P1Y +# tasks: +# - merge: +# inputs: +# - analysis_foo_bar: +# target_cycle: +# lag: ["P0M", "P6M"] +# port: None +# outputs: [yearly_analysis] +# tasks: +# - icon: +# plugin: shell +# src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/icon.py +# command: "icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}" +# parameters: [foo, bar] +# computer: localhost +# - statistics_foo: +# plugin: shell +# src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/statistics.py +# command: "statistics.py {PORT::None}" +# parameters: [bar] +# computer: localhost +# - statistics_foo_bar: +# plugin: shell +# src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/statistics.py +# command: "statistics.py {PORT::None}" +# computer: localhost +# - merge: +# plugin: shell +# src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/merge.py +# command: "merge.py {PORT::None}" +# computer: localhost +# data: +# available: +# - initial_conditions: +# type: file +# src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/small/config/data/initial_conditions +# computer: localhost +# - forcing: +# type: file +# src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/data/forcing +# computer: localhost +# generated: +# - icon_output: +# type: file +# src: icon_output +# parameters: [foo, bar] +# - icon_restart: +# type: file +# src: restart +# parameters: [foo, bar] +# - analysis_foo: +# type: file +# src: analysis +# parameters: [bar] +# - analysis_foo_bar: +# type: file +# src: analysis +# - yearly_analysis: +# type: file +# src: analysis +# parameters: +# foo: [0, 1] +# bar: [3.0] +# """ +# ) +# yaml_file = tmp_path / "config.yml" +# yaml_file.write_text(yaml_str) + +# core_wf = Workflow.from_config_file(yaml_file) +# aiida_wf = AiidaWorkGraph(core_workflow=core_wf) +# filenames_list = [task.inputs.filenames.value for task in aiida_wf._workgraph.tasks] +# arguments_list = [task.inputs.arguments.value for task in aiida_wf._workgraph.tasks] +# nodes_list = [ +# list(task.inputs.nodes._sockets.keys()) for task in aiida_wf._workgraph.tasks +# ] + +# expected_filenames_list = [ +# {"forcing": "forcing", "initial_conditions": "initial_conditions"}, +# {"forcing": "forcing", "initial_conditions": "initial_conditions"}, +# { +# "forcing": "forcing", +# "icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00": "restart", +# }, +# { +# "forcing": "forcing", +# "icon_restart_foo_1___bar_3_0___date_2026_01_01_00_00_00": "restart", +# }, +# { +# "forcing": "forcing", +# "icon_restart_foo_0___bar_3_0___date_2026_07_01_00_00_00": "restart", +# }, +# { +# "forcing": "forcing", +# "icon_restart_foo_1___bar_3_0___date_2026_07_01_00_00_00": "restart", +# }, +# { +# "forcing": "forcing", +# "icon_restart_foo_0___bar_3_0___date_2027_01_01_00_00_00": "restart", +# }, +# { +# "forcing": "forcing", +# "icon_restart_foo_1___bar_3_0___date_2027_01_01_00_00_00": "restart", +# }, +# { +# "icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00": "icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00", +# "icon_output_foo_1___bar_3_0___date_2026_01_01_00_00_00": "icon_output_foo_1___bar_3_0___date_2026_01_01_00_00_00", +# }, +# { +# "icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00": "icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00", +# "icon_output_foo_1___bar_3_0___date_2026_07_01_00_00_00": "icon_output_foo_1___bar_3_0___date_2026_07_01_00_00_00", +# }, +# { +# "icon_output_foo_0___bar_3_0___date_2027_01_01_00_00_00": "icon_output_foo_0___bar_3_0___date_2027_01_01_00_00_00", +# "icon_output_foo_1___bar_3_0___date_2027_01_01_00_00_00": "icon_output_foo_1___bar_3_0___date_2027_01_01_00_00_00", +# }, +# { +# "icon_output_foo_0___bar_3_0___date_2027_07_01_00_00_00": "icon_output_foo_0___bar_3_0___date_2027_07_01_00_00_00", +# "icon_output_foo_1___bar_3_0___date_2027_07_01_00_00_00": "icon_output_foo_1___bar_3_0___date_2027_07_01_00_00_00", +# }, +# {"analysis_foo_bar_3_0___date_2026_01_01_00_00_00": "analysis"}, +# {"analysis_foo_bar_3_0___date_2026_07_01_00_00_00": "analysis"}, +# {"analysis_foo_bar_3_0___date_2027_01_01_00_00_00": "analysis"}, +# {"analysis_foo_bar_3_0___date_2027_07_01_00_00_00": "analysis"}, +# { +# "analysis_foo_bar_date_2026_01_01_00_00_00": "analysis_foo_bar_date_2026_01_01_00_00_00", +# "analysis_foo_bar_date_2026_07_01_00_00_00": "analysis_foo_bar_date_2026_07_01_00_00_00", +# }, +# { +# "analysis_foo_bar_date_2027_01_01_00_00_00": "analysis_foo_bar_date_2027_01_01_00_00_00", +# "analysis_foo_bar_date_2027_07_01_00_00_00": "analysis_foo_bar_date_2027_07_01_00_00_00", +# }, +# ] + +# expected_arguments_list = [ +# "--restart --init {initial_conditions} --forcing {forcing}", +# "--restart --init {initial_conditions} --forcing {forcing}", +# "--restart {icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00} --init " +# "--forcing {forcing}", +# "--restart {icon_restart_foo_1___bar_3_0___date_2026_01_01_00_00_00} --init " +# "--forcing {forcing}", +# "--restart {icon_restart_foo_0___bar_3_0___date_2026_07_01_00_00_00} --init " +# "--forcing {forcing}", +# "--restart {icon_restart_foo_1___bar_3_0___date_2026_07_01_00_00_00} --init " +# "--forcing {forcing}", +# "--restart {icon_restart_foo_0___bar_3_0___date_2027_01_01_00_00_00} --init " +# "--forcing {forcing}", +# "--restart {icon_restart_foo_1___bar_3_0___date_2027_01_01_00_00_00} --init " +# "--forcing {forcing}", +# "{icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00} " +# "{icon_output_foo_1___bar_3_0___date_2026_01_01_00_00_00}", +# "{icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00} " +# "{icon_output_foo_1___bar_3_0___date_2026_07_01_00_00_00}", +# "{icon_output_foo_0___bar_3_0___date_2027_01_01_00_00_00} " +# "{icon_output_foo_1___bar_3_0___date_2027_01_01_00_00_00}", +# "{icon_output_foo_0___bar_3_0___date_2027_07_01_00_00_00} " +# "{icon_output_foo_1___bar_3_0___date_2027_07_01_00_00_00}", +# "{analysis_foo_bar_3_0___date_2026_01_01_00_00_00}", +# "{analysis_foo_bar_3_0___date_2026_07_01_00_00_00}", +# "{analysis_foo_bar_3_0___date_2027_01_01_00_00_00}", +# "{analysis_foo_bar_3_0___date_2027_07_01_00_00_00}", +# "{analysis_foo_bar_date_2026_01_01_00_00_00} " +# "{analysis_foo_bar_date_2026_07_01_00_00_00}", +# "{analysis_foo_bar_date_2027_01_01_00_00_00} " +# "{analysis_foo_bar_date_2027_07_01_00_00_00}", +# ] + +# expected_nodes_list = [ +# ["initial_conditions", "forcing"], +# ["initial_conditions", "forcing"], +# ["icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00", "forcing"], +# ["icon_restart_foo_1___bar_3_0___date_2026_01_01_00_00_00", "forcing"], +# ["icon_restart_foo_0___bar_3_0___date_2026_07_01_00_00_00", "forcing"], +# ["icon_restart_foo_1___bar_3_0___date_2026_07_01_00_00_00", "forcing"], +# ["icon_restart_foo_0___bar_3_0___date_2027_01_01_00_00_00", "forcing"], +# ["icon_restart_foo_1___bar_3_0___date_2027_01_01_00_00_00", "forcing"], +# [ +# "icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00", +# "icon_output_foo_1___bar_3_0___date_2026_01_01_00_00_00", +# ], +# [ +# "icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00", +# "icon_output_foo_1___bar_3_0___date_2026_07_01_00_00_00", +# ], +# [ +# "icon_output_foo_0___bar_3_0___date_2027_01_01_00_00_00", +# "icon_output_foo_1___bar_3_0___date_2027_01_01_00_00_00", +# ], +# [ +# "icon_output_foo_0___bar_3_0___date_2027_07_01_00_00_00", +# "icon_output_foo_1___bar_3_0___date_2027_07_01_00_00_00", +# ], +# ["analysis_foo_bar_3_0___date_2026_01_01_00_00_00"], +# ["analysis_foo_bar_3_0___date_2026_07_01_00_00_00"], +# ["analysis_foo_bar_3_0___date_2027_01_01_00_00_00"], +# ["analysis_foo_bar_3_0___date_2027_07_01_00_00_00"], +# [ +# "analysis_foo_bar_date_2026_01_01_00_00_00", +# "analysis_foo_bar_date_2026_07_01_00_00_00", +# ], +# [ +# "analysis_foo_bar_date_2027_01_01_00_00_00", +# "analysis_foo_bar_date_2027_07_01_00_00_00", +# ], +# ] + +# assert arguments_list == expected_arguments_list +# assert filenames_list == expected_filenames_list +# assert nodes_list == expected_nodes_list From 47e361d620c83a3401ee2ca5148cbf566dff43b8 Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Mon, 2 Jun 2025 16:10:46 +0200 Subject: [PATCH 12/27] . --- tests/test_workgraph.py | 376 ++++++++++++++++------------------------ 1 file changed, 151 insertions(+), 225 deletions(-) diff --git a/tests/test_workgraph.py b/tests/test_workgraph.py index 49e9c572..56d21f80 100644 --- a/tests/test_workgraph.py +++ b/tests/test_workgraph.py @@ -132,43 +132,57 @@ def test_basic_remote_data_filename(tmp_path): assert task.inputs.filenames.value == {"data": "foo.txt"} assert task.inputs.arguments.value == "{data}" +@pytest.mark.usefixtures('aiida_localhost') def test_parameterized_filename_conflicts(tmp_path): """Test that parameterized data gets unique filenames when conflicts occur.""" yaml_content = textwrap.dedent(""" - cycles: - - test_cycle: - tasks: + name: test_workflow + cycles: + - simulation_cycle: + tasks: + - simulate: + inputs: + - input_file: + port: input + outputs: [simulation_output] + - processing_cycle: + tasks: + - process_data: + inputs: + - simulation_output: + parameters: + param: all + port: files + outputs: [processed_output] + tasks: + - simulate: + plugin: shell + command: "simulate.py {PORT::input}" + parameters: [param] + computer: localhost - process_data: - inputs: - - simulation_output: - parameters: - param: all - port: files - outputs: [processed_output] - tasks: - - process_data: - plugin: shell - command: "process.py {PORT::files}" - parameters: [param] - computer: localhost - data: - available: - - input_file: - type: file - src: input.txt - computer: localhost - generated: - - simulation_output: - type: file - src: output.dat - parameters: [param] - - processed_output: - type: file - src: processed.dat - parameters: [param] - parameters: - param: [1, 2] - """) + plugin: shell + command: "process.py {PORT::files}" + parameters: [param] + computer: localhost + data: + available: + - input_file: + type: file + src: input.txt + computer: localhost + generated: + - simulation_output: + type: file + src: output.dat + parameters: [param] + - processed_output: + type: file + src: processed.dat + parameters: [param] + parameters: + param: [1, 2] + """) config_file = tmp_path / "config.yml" config_file.write_text(yaml_content) @@ -207,6 +221,97 @@ def test_parameterized_filename_conflicts(tmp_path): assert filenames[key] == key # Full label used as filename assert key in arguments # Key appears in arguments +@pytest.mark.usefixtures('aiida_localhost') +def test_parameterized_filename_conflicts(tmp_path): + """Test that parameterized data gets unique filenames when conflicts occur.""" + yaml_content = textwrap.dedent(""" + name: test_workflow + cycles: + - simulation_cycle: + tasks: + - simulate: + inputs: + - input_file: + port: input + outputs: [simulation_output] + - processing_cycle: + tasks: + - process_data: + inputs: + - simulation_output: + parameters: + param: all + port: files + outputs: [processed_output] + tasks: + - simulate: + plugin: shell + command: "/tmp/simulate.py {PORT::input}" + parameters: [param] + computer: localhost + - process_data: + plugin: shell + command: "/tmp/process.py {PORT::files}" + parameters: [param] + computer: localhost + data: + available: + - input_file: + type: file + src: input.txt + computer: localhost + generated: + - simulation_output: + type: file + src: output.dat + parameters: [param] + - processed_output: + type: file + src: processed.dat + parameters: [param] + parameters: + param: [1, 2] + """) + + config_file = tmp_path / "config.yml" + config_file.write_text(yaml_content) + + # Create required files + (tmp_path / "input.txt").touch() + (tmp_path / "simulate.py").touch() + (tmp_path / "process.py").touch() + + core_wf = Workflow.from_config_file(str(config_file)) + aiida_wf = AiidaWorkGraph(core_wf) + + # Find the task that processes multiple parameterized inputs + process_tasks = [ + task + for task in aiida_wf._workgraph.tasks + if task.name.startswith("process_data") + ] + + # Should have 2 tasks (one for each parameter value) + assert len(process_tasks) == 2 + + # Check that each task has unique node keys and appropriate filenames + for task in process_tasks: + nodes_keys = list(task.inputs.nodes._sockets.keys()) + filenames = task.inputs.filenames.value + arguments = task.inputs.arguments.value + + # Each task should have two simulation outputs as input + sim_output_keys = [ + k for k in nodes_keys if k.startswith("simulation_output") + ] + import ipdb; ipdb.set_trace() + assert len(sim_output_keys) == 2 + + # The filename should be the full label (since there are conflicts) + key = sim_output_keys[0] + assert filenames[key] == key # Full label used as filename + assert key in arguments # Key appears in arguments + def test_mixed_conflict_and_no_conflict(tmp_path): """Test workflow with both conflicting and non-conflicting data.""" yaml_content = textwrap.dedent(""" @@ -226,6 +331,7 @@ def test_mixed_conflict_and_no_conflict(tmp_path): - analyze: plugin: shell command: "analyze.py --config {PORT::config} --data {PORT::data}" + src: analyze.py parameters: [run] computer: localhost data: @@ -275,12 +381,6 @@ def test_mixed_conflict_and_no_conflict(tmp_path): ][0] assert filenames[sim_data_key] == sim_data_key # Full label as filename - -# ============================================================================ -# COMPREHENSIVE TEST - Keep one comprehensive test for the full scenario -# ============================================================================ - - @pytest.mark.usefixtures("aiida_localhost") def test_comprehensive_parameterized_workflow(tmp_path): """Comprehensive test covering the full parameterized workflow scenario. @@ -381,192 +481,18 @@ def test_comprehensive_parameterized_workflow(tmp_path): assert "foo_" in key and "bar_3_0" in key # Contains parameter info -# ============================================================================ -# FIXTURES -# ============================================================================ - - -@pytest.fixture -def sample_workflow_config(tmp_path): - """Fixture providing a reusable workflow configuration for testing.""" - config = { - "name": "test_workflow", - "rootdir": tmp_path, - "cycles": [], - "tasks": [], - "data": models.ConfigData(), - "parameters": {}, - } - return config - - -# import pytest -# from aiida import orm - -# from sirocco.core import Workflow -# from sirocco.parsing import yaml_data_models as models -# from sirocco.workgraph import AiidaWorkGraph -# import textwrap - - -# @pytest.mark.usefixtures("aiida_localhost") -# def test_set_shelljob_filenames(tmp_path): -# file_name = "foo.txt" -# file_path = tmp_path / file_name -# # Dummy script, as `src` must be specified due to relative command path -# script_path = tmp_path / "my_script.sh" - -# config_wf = models.ConfigWorkflow( -# name="remote", -# rootdir=tmp_path, -# cycles=[ -# models.ConfigCycle( -# name="remote", -# tasks=[ -# models.ConfigCycleTask( -# name="task", -# inputs=[ -# models.ConfigCycleTaskInput(name="my_data", port="unused") -# ], -# ), -# ], -# ), -# ], -# tasks=[ -# models.ConfigShellTask( -# name="task", -# command="echo test", -# src=str(script_path), -# computer="localhost", -# ), -# ], -# data=models.ConfigData( -# available=[ -# models.ConfigAvailableData( -# name="my_data", -# type=models.DataType.FILE, -# src=str(file_path), -# computer="localhost", -# ) -# ], -# ), -# parameters={}, -# ) - -# core_wf = Workflow.from_config_workflow(config_workflow=config_wf) -# aiida_wf = AiidaWorkGraph(core_workflow=core_wf) -# remote_data = aiida_wf._workgraph.tasks[0].inputs.nodes["my_data"].value # noqa: SLF001 -# assert isinstance(remote_data, orm.RemoteData) -# filenames = aiida_wf._workgraph.tasks[0].inputs.filenames.value # noqa: SLF001 -# assert filenames == {"my_data": "foo.txt"} - - -# @pytest.mark.usefixtures("aiida_localhost") -# def test_multiple_inputs_filenames(tmp_path): -# file_names = ["foo.txt", "bar.txt", "baz.dat"] -# for name in file_names: -# (tmp_path / name).touch() -# script_path = tmp_path / "my_script.sh" - -# # Create configuration with multiple inputs -# config_wf = models.ConfigWorkflow( -# name="remote", -# rootdir=tmp_path, -# cycles=[ -# models.ConfigCycle( -# name="remote", -# tasks=[ -# models.ConfigCycleTask( -# name="task", -# inputs=[ -# models.ConfigCycleTaskInput( -# name=f"data_{i}", port=f"port_{i}" -# ) -# for i in range(len(file_names)) -# ], -# ), -# ], -# ), -# ], -# tasks=[ -# models.ConfigShellTask( -# name="task", -# command="echo test", -# src=str(script_path), -# computer="localhost", -# ), -# ], -# data=models.ConfigData( -# available=[ -# models.ConfigAvailableData( -# name=f"data_{i}", -# type=models.DataType.FILE, -# src=name, -# computer="localhost", -# ) -# for i, name in enumerate(file_names) -# ], -# ), -# parameters={}, -# ) - -# core_wf = Workflow.from_config_workflow(config_workflow=config_wf) -# aiida_wf = AiidaWorkGraph(core_workflow=core_wf) - -# obtained_filenames = {f"data_{i}": name for i, name in enumerate(file_names)} -# filenames = aiida_wf._workgraph.tasks[0].inputs.filenames.value # noqa: SLF001 -# assert filenames == obtained_filenames - - -# @pytest.mark.usefixtures("aiida_localhost") -# def test_directory_input_filenames(tmp_path): -# dir_name = "test_dir" -# dir_path = tmp_path / dir_name -# dir_path.mkdir() -# script_path = tmp_path / "my_script.sh" - -# config_wf = models.ConfigWorkflow( -# name="remote", -# rootdir=tmp_path, -# cycles=[ -# models.ConfigCycle( -# name="remote", -# tasks=[ -# models.ConfigCycleTask( -# name="task", -# inputs=[ -# models.ConfigCycleTaskInput(name="my_dir", port="unused") -# ], -# ), -# ], -# ), -# ], -# tasks=[ -# models.ConfigShellTask( -# name="task", -# command="echo test", -# src=str(script_path), -# computer="localhost", -# ), -# ], -# data=models.ConfigData( -# available=[ -# models.ConfigAvailableData( -# name="my_dir", -# type=models.DataType.DIR, -# src=dir_name, -# computer="localhost", -# ) -# ], -# ), -# parameters={}, -# ) - -# core_wf = Workflow.from_config_workflow(config_workflow=config_wf) -# aiida_wf = AiidaWorkGraph(core_workflow=core_wf) - -# filenames = aiida_wf._workgraph.tasks[0].inputs.filenames.value # noqa: SLF001 -# assert filenames == {"my_dir": dir_name} +# @pytest.fixture +# def sample_workflow_config(tmp_path): +# """Fixture providing a reusable workflow configuration for testing.""" +# config = { +# "name": "test_workflow", +# "rootdir": tmp_path, +# "cycles": [], +# "tasks": [], +# "data": models.ConfigData(), +# "parameters": {}, +# } +# return config # @pytest.mark.usefixtures("aiida_localhost") From 7d84dd4300ad85a45e88e4838871c5799262fbcc Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Mon, 2 Jun 2025 17:25:06 +0200 Subject: [PATCH 13/27] . --- tests/test_workgraph.py | 919 +++++++++++++++++++--------------------- 1 file changed, 442 insertions(+), 477 deletions(-) diff --git a/tests/test_workgraph.py b/tests/test_workgraph.py index 56d21f80..fdd52655 100644 --- a/tests/test_workgraph.py +++ b/tests/test_workgraph.py @@ -1,9 +1,8 @@ import pytest -from pathlib import Path from aiida import orm import textwrap -from sirocco.core import Workflow, AvailableData, GeneratedData +from sirocco.core import Workflow, GeneratedData from sirocco.parsing import yaml_data_models as models from sirocco.workgraph import AiidaWorkGraph @@ -13,9 +12,7 @@ def test_get_aiida_label_from_graph_item(tmp_path): # Mock data nodes with different coordinate combinations output_path = tmp_path / "output" - data_simple = GeneratedData( - name="output", type=models.DataType.FILE, src=output_path, coordinates={} - ) + data_simple = GeneratedData(name="output", type=models.DataType.FILE, src=output_path, coordinates={}) data_with_date = GeneratedData( name="output", @@ -33,10 +30,7 @@ def test_get_aiida_label_from_graph_item(tmp_path): # Test label generation assert AiidaWorkGraph.get_aiida_label_from_graph_item(data_simple) == "output" - assert ( - AiidaWorkGraph.get_aiida_label_from_graph_item(data_with_date) - == "output_date_2026_01_01_00_00_00" - ) + assert AiidaWorkGraph.get_aiida_label_from_graph_item(data_with_date) == "output_date_2026_01_01_00_00_00" assert ( AiidaWorkGraph.get_aiida_label_from_graph_item(data_with_params) == "output_foo_0___bar_3_0___date_2026_01_01_00_00_00" @@ -78,8 +72,7 @@ def test_filename_conflict_detection(tmp_path): assert len(other_conflicts) == 1 # Should use simple filename - -@pytest.mark.usefixtures('aiida_localhost') +@pytest.mark.usefixtures("aiida_localhost") def test_basic_remote_data_filename(tmp_path): """Test basic RemoteData filename handling.""" file_path = tmp_path / "foo.txt" @@ -96,9 +89,7 @@ def test_basic_remote_data_filename(tmp_path): tasks=[ models.ConfigCycleTask( name="task", - inputs=[ - models.ConfigCycleTaskInput(name="data", port="input") - ], + inputs=[models.ConfigCycleTaskInput(name="data", port="input")], ) ], ), @@ -132,10 +123,17 @@ def test_basic_remote_data_filename(tmp_path): assert task.inputs.filenames.value == {"data": "foo.txt"} assert task.inputs.arguments.value == "{data}" -@pytest.mark.usefixtures('aiida_localhost') + +@pytest.mark.usefixtures("aiida_localhost") def test_parameterized_filename_conflicts(tmp_path): - """Test that parameterized data gets unique filenames when conflicts occur.""" - yaml_content = textwrap.dedent(""" + """Test parameterized data filename handling in various conflict scenarios. + + This test covers: + 1. Parameterized data with conflicts (multiple files with same base name) + 2. Mixed conflict/no-conflict scenarios (some files conflict, others don't) + 3. Proper filename assignment based on conflict detection + """ + yaml_content = textwrap.dedent(f""" name: test_workflow cycles: - simulation_cycle: @@ -144,6 +142,8 @@ def test_parameterized_filename_conflicts(tmp_path): inputs: - input_file: port: input + - shared_config: + port: config outputs: [simulation_output] - processing_cycle: tasks: @@ -151,126 +151,62 @@ def test_parameterized_filename_conflicts(tmp_path): inputs: - simulation_output: parameters: - param: all + foo: all port: files outputs: [processed_output] - tasks: - - simulate: - plugin: shell - command: "simulate.py {PORT::input}" - parameters: [param] - computer: localhost - - process_data: - plugin: shell - command: "process.py {PORT::files}" - parameters: [param] - computer: localhost - data: - available: - - input_file: - type: file - src: input.txt - computer: localhost - generated: - - simulation_output: - type: file - src: output.dat - parameters: [param] - - processed_output: - type: file - src: processed.dat - parameters: [param] - parameters: - param: [1, 2] - """) - - config_file = tmp_path / "config.yml" - config_file.write_text(yaml_content) - - # Create required files - (tmp_path / "input.txt").touch() - (tmp_path / "process.py").touch() - - core_wf = Workflow.from_config_file(str(config_file)) - aiida_wf = AiidaWorkGraph(core_wf) - - # Find the task that processes multiple parameterized inputs - process_tasks = [ - task - for task in aiida_wf._workgraph.tasks - if task.name.startswith("process_data") - ] - - # Should have 2 tasks (one for each parameter value) - assert len(process_tasks) == 2 - - # Check that each task has unique node keys and appropriate filenames - for task in process_tasks: - nodes_keys = list(task.inputs.nodes._sockets.keys()) - filenames = task.inputs.filenames.value - arguments = task.inputs.arguments.value - - # Each task should have exactly one simulation_output input - sim_output_keys = [ - k for k in nodes_keys if k.startswith("simulation_output") - ] - assert len(sim_output_keys) == 1 - - # The filename should be the full label (since there are conflicts) - key = sim_output_keys[0] - assert filenames[key] == key # Full label used as filename - assert key in arguments # Key appears in arguments - -@pytest.mark.usefixtures('aiida_localhost') -def test_parameterized_filename_conflicts(tmp_path): - """Test that parameterized data gets unique filenames when conflicts occur.""" - yaml_content = textwrap.dedent(""" - name: test_workflow - cycles: - - simulation_cycle: - tasks: - - simulate: - inputs: - - input_file: - port: input - outputs: [simulation_output] - - processing_cycle: - tasks: - - process_data: + - analyze: inputs: + - shared_config: + port: config - simulation_output: parameters: - param: all - port: files - outputs: [processed_output] + foo: all + port: data + outputs: [analysis_result] tasks: - simulate: plugin: shell - command: "/tmp/simulate.py {PORT::input}" - parameters: [param] + command: "simulate.py {{PORT::input}} --config {{PORT::config}}" + src: {tmp_path}/simulate.py + parameters: [foo] computer: localhost - process_data: plugin: shell - command: "/tmp/process.py {PORT::files}" - parameters: [param] + command: "process.py {{PORT::files}}" + src: {tmp_path}/process.py + parameters: [foo] + computer: localhost + - analyze: + plugin: shell + command: "analyze.py --config {{PORT::config}} --data {{PORT::data}}" + src: {tmp_path}/analyze.py + parameters: [foo] computer: localhost data: available: - input_file: type: file - src: input.txt + src: {tmp_path}/input.txt + computer: localhost + - shared_config: + type: file + src: {tmp_path}/config.json computer: localhost generated: - simulation_output: type: file src: output.dat - parameters: [param] + parameters: [foo] - processed_output: type: file src: processed.dat - parameters: [param] + parameters: [foo] + - analysis_result: + type: file + src: result.txt + parameters: [foo] parameters: - param: [1, 2] + foo: [1, 2] """) config_file = tmp_path / "config.yml" @@ -278,120 +214,66 @@ def test_parameterized_filename_conflicts(tmp_path): # Create required files (tmp_path / "input.txt").touch() + (tmp_path / "config.json").touch() (tmp_path / "simulate.py").touch() (tmp_path / "process.py").touch() + (tmp_path / "analyze.py").touch() core_wf = Workflow.from_config_file(str(config_file)) aiida_wf = AiidaWorkGraph(core_wf) - # Find the task that processes multiple parameterized inputs - process_tasks = [ - task - for task in aiida_wf._workgraph.tasks - if task.name.startswith("process_data") - ] - - # Should have 2 tasks (one for each parameter value) - assert len(process_tasks) == 2 + # Test 1: process_data tasks (from first test) + process_tasks = [task for task in aiida_wf._workgraph.tasks if task.name.startswith("process_data")] + assert len(process_tasks) == 2 # One for each foo value - # Check that each task has unique node keys and appropriate filenames for task in process_tasks: nodes_keys = list(task.inputs.nodes._sockets.keys()) filenames = task.inputs.filenames.value arguments = task.inputs.arguments.value - # Each task should have two simulation outputs as input - sim_output_keys = [ - k for k in nodes_keys if k.startswith("simulation_output") - ] - import ipdb; ipdb.set_trace() + # Each task should have exactly two simulation_output inputs (foo=1 and foo=2) + sim_output_keys = [k for k in nodes_keys if k.startswith("simulation_output")] assert len(sim_output_keys) == 2 - # The filename should be the full label (since there are conflicts) - key = sim_output_keys[0] - assert filenames[key] == key # Full label used as filename - assert key in arguments # Key appears in arguments - -def test_mixed_conflict_and_no_conflict(tmp_path): - """Test workflow with both conflicting and non-conflicting data.""" - yaml_content = textwrap.dedent(""" - cycles: - - test_cycle: - tasks: - - analyze: - inputs: - - shared_config: # Single file, no conflict - port: config - - simulation_data: # Multiple files, conflict expected - parameters: - run: all - port: data - outputs: [analysis_result] - tasks: - - analyze: - plugin: shell - command: "analyze.py --config {PORT::config} --data {PORT::data}" - src: analyze.py - parameters: [run] - computer: localhost - data: - available: - - shared_config: - type: file - src: config.json - computer: localhost - generated: - - simulation_data: - type: file - src: sim_output.nc - parameters: [run] - - analysis_result: - type: file - src: result.txt - parameters: [run] - parameters: - run: [1, 2] - """) + # The filenames should be the full labels (since there are conflicts) + for key in sim_output_keys: + assert filenames[key] == key # Full label used as filename + assert key in arguments # Key appears in arguments - config_file = tmp_path / "config.yml" - config_file.write_text(yaml_content) + # Test 2: analyze tasks (from second test - mixed conflict/no-conflict) + analyze_tasks = [task for task in aiida_wf._workgraph.tasks if task.name.startswith("analyze")] + assert len(analyze_tasks) == 2 # One for each foo value - # Create files - (tmp_path / "config.json").touch() - (tmp_path / "analyze.py").touch() + for task in analyze_tasks: + filenames = task.inputs.filenames.value - core_wf = Workflow.from_config_file(str(config_file)) - aiida_wf = AiidaWorkGraph(core_wf) + # shared_config should use simple filename (no conflict across tasks) + assert filenames["shared_config"] == "config.json" - analyze_tasks = [ - task - for task in aiida_wf._workgraph.tasks - if task.name.startswith("analyze") - ] + # simulation_output should use full labels (conflict with other analyze tasks) + sim_output_keys = [k for k in filenames.keys() if k.startswith("simulation_output")] + assert len(sim_output_keys) == 2 # Should have both foo=1 and foo=2 inputs - for task in analyze_tasks: + for key in sim_output_keys: + assert filenames[key] == key # Full label as filename + assert "foo_" in key # Contains parameter info + + # Test 3: simulate tasks (should have simple filenames for shared_config) + simulate_tasks = [task for task in aiida_wf._workgraph.tasks if task.name.startswith("simulate")] + assert len(simulate_tasks) == 2 # One for each foo value + + for task in simulate_tasks: filenames = task.inputs.filenames.value - # shared_config should use simple filename (no conflict) + # Both input_file and shared_config should use simple names (no conflicts) + assert filenames["input_file"] == "input.txt" assert filenames["shared_config"] == "config.json" - # simulation_data should use full label (conflict with other tasks) - sim_data_key = [ - k for k in filenames.keys() if k.startswith("simulation_data") - ][0] - assert filenames[sim_data_key] == sim_data_key # Full label as filename @pytest.mark.usefixtures("aiida_localhost") def test_comprehensive_parameterized_workflow(tmp_path): - """Comprehensive test covering the full parameterized workflow scenario. - - This test validates the complete integration including: - - Multiple parameters (foo, bar) and dates - - Mixed conflict/no-conflict scenarios - - Correct argument resolution - - Proper filename mapping - """ - yaml_str = textwrap.dedent(""" + """Test parameterized workflow behavior and properties.""" + yaml_str = textwrap.dedent(f""" start_date: &start "2026-01-01T00:00" stop_date: &stop "2026-07-01T00:00" cycles: @@ -409,25 +291,27 @@ def test_comprehensive_parameterized_workflow(tmp_path): - analyze: inputs: - sim_output: - parameters: {foo: all, bar: single} + parameters: {{foo: all, bar: single}} port: data outputs: [analysis] tasks: - simulate: plugin: shell - command: "sim.py {PORT::cfg}" + command: "sim.py {{PORT::cfg}}" + src: {tmp_path}/sim.py parameters: [foo, bar] computer: localhost - analyze: plugin: shell - command: "analyze.py {PORT::data}" + command: "analyze.py {{PORT::data}}" + src: {tmp_path}/analyze.py parameters: [bar] computer: localhost data: available: - config: type: file - src: config.txt + src: {tmp_path}/config.txt computer: localhost generated: - sim_output: @@ -456,9 +340,7 @@ def test_comprehensive_parameterized_workflow(tmp_path): # Verify task structure sim_tasks = [t for t in aiida_wf._workgraph.tasks if t.name.startswith("simulate")] - analyze_tasks = [ - t for t in aiida_wf._workgraph.tasks if t.name.startswith("analyze") - ] + analyze_tasks = [t for t in aiida_wf._workgraph.tasks if t.name.startswith("analyze")] assert len(sim_tasks) == 2 # 2 foo values × 1 bar value = 2 tasks assert len(analyze_tasks) == 1 # 1 bar value = 1 task @@ -481,273 +363,356 @@ def test_comprehensive_parameterized_workflow(tmp_path): assert "foo_" in key and "bar_3_0" in key # Contains parameter info -# @pytest.fixture -# def sample_workflow_config(tmp_path): -# """Fixture providing a reusable workflow configuration for testing.""" -# config = { -# "name": "test_workflow", -# "rootdir": tmp_path, -# "cycles": [], -# "tasks": [], -# "data": models.ConfigData(), -# "parameters": {}, -# } -# return config - - -# @pytest.mark.usefixtures("aiida_localhost") -# def test_linking_complex_dates_and_parameters(tmp_path): -# yaml_str = textwrap.dedent( -# """ -# start_date: &root_start_date "2026-01-01T00:00" -# stop_date: &root_stop_date "2028-01-01T00:00" -# cycles: -# - bimonthly_tasks: -# cycling: -# start_date: *root_start_date -# stop_date: *root_stop_date -# period: P6M -# tasks: -# - icon: -# inputs: -# - initial_conditions: -# when: -# at: *root_start_date -# port: init -# - icon_restart: -# when: -# after: *root_start_date -# target_cycle: -# lag: -P6M -# parameters: -# foo: single -# bar: single -# port: restart -# - forcing: -# port: forcing -# outputs: [icon_output, icon_restart] -# - statistics_foo: -# inputs: -# - icon_output: -# parameters: -# bar: single -# port: None -# outputs: [analysis_foo] -# - statistics_foo_bar: -# inputs: -# - analysis_foo: -# port: None -# outputs: [analysis_foo_bar] -# - yearly: -# cycling: -# start_date: *root_start_date -# stop_date: *root_stop_date -# period: P1Y -# tasks: -# - merge: -# inputs: -# - analysis_foo_bar: -# target_cycle: -# lag: ["P0M", "P6M"] -# port: None -# outputs: [yearly_analysis] -# tasks: -# - icon: -# plugin: shell -# src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/icon.py -# command: "icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}" -# parameters: [foo, bar] -# computer: localhost -# - statistics_foo: -# plugin: shell -# src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/statistics.py -# command: "statistics.py {PORT::None}" -# parameters: [bar] -# computer: localhost -# - statistics_foo_bar: -# plugin: shell -# src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/statistics.py -# command: "statistics.py {PORT::None}" -# computer: localhost -# - merge: -# plugin: shell -# src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/merge.py -# command: "merge.py {PORT::None}" -# computer: localhost -# data: -# available: -# - initial_conditions: -# type: file -# src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/small/config/data/initial_conditions -# computer: localhost -# - forcing: -# type: file -# src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/data/forcing -# computer: localhost -# generated: -# - icon_output: -# type: file -# src: icon_output -# parameters: [foo, bar] -# - icon_restart: -# type: file -# src: restart -# parameters: [foo, bar] -# - analysis_foo: -# type: file -# src: analysis -# parameters: [bar] -# - analysis_foo_bar: -# type: file -# src: analysis -# - yearly_analysis: -# type: file -# src: analysis -# parameters: -# foo: [0, 1] -# bar: [3.0] -# """ -# ) -# yaml_file = tmp_path / "config.yml" -# yaml_file.write_text(yaml_str) - -# core_wf = Workflow.from_config_file(yaml_file) -# aiida_wf = AiidaWorkGraph(core_workflow=core_wf) -# filenames_list = [task.inputs.filenames.value for task in aiida_wf._workgraph.tasks] -# arguments_list = [task.inputs.arguments.value for task in aiida_wf._workgraph.tasks] -# nodes_list = [ -# list(task.inputs.nodes._sockets.keys()) for task in aiida_wf._workgraph.tasks -# ] - -# expected_filenames_list = [ -# {"forcing": "forcing", "initial_conditions": "initial_conditions"}, -# {"forcing": "forcing", "initial_conditions": "initial_conditions"}, -# { -# "forcing": "forcing", -# "icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00": "restart", -# }, -# { -# "forcing": "forcing", -# "icon_restart_foo_1___bar_3_0___date_2026_01_01_00_00_00": "restart", -# }, -# { -# "forcing": "forcing", -# "icon_restart_foo_0___bar_3_0___date_2026_07_01_00_00_00": "restart", -# }, -# { -# "forcing": "forcing", -# "icon_restart_foo_1___bar_3_0___date_2026_07_01_00_00_00": "restart", -# }, -# { -# "forcing": "forcing", -# "icon_restart_foo_0___bar_3_0___date_2027_01_01_00_00_00": "restart", -# }, -# { -# "forcing": "forcing", -# "icon_restart_foo_1___bar_3_0___date_2027_01_01_00_00_00": "restart", -# }, -# { -# "icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00": "icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00", -# "icon_output_foo_1___bar_3_0___date_2026_01_01_00_00_00": "icon_output_foo_1___bar_3_0___date_2026_01_01_00_00_00", -# }, -# { -# "icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00": "icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00", -# "icon_output_foo_1___bar_3_0___date_2026_07_01_00_00_00": "icon_output_foo_1___bar_3_0___date_2026_07_01_00_00_00", -# }, -# { -# "icon_output_foo_0___bar_3_0___date_2027_01_01_00_00_00": "icon_output_foo_0___bar_3_0___date_2027_01_01_00_00_00", -# "icon_output_foo_1___bar_3_0___date_2027_01_01_00_00_00": "icon_output_foo_1___bar_3_0___date_2027_01_01_00_00_00", -# }, -# { -# "icon_output_foo_0___bar_3_0___date_2027_07_01_00_00_00": "icon_output_foo_0___bar_3_0___date_2027_07_01_00_00_00", -# "icon_output_foo_1___bar_3_0___date_2027_07_01_00_00_00": "icon_output_foo_1___bar_3_0___date_2027_07_01_00_00_00", -# }, -# {"analysis_foo_bar_3_0___date_2026_01_01_00_00_00": "analysis"}, -# {"analysis_foo_bar_3_0___date_2026_07_01_00_00_00": "analysis"}, -# {"analysis_foo_bar_3_0___date_2027_01_01_00_00_00": "analysis"}, -# {"analysis_foo_bar_3_0___date_2027_07_01_00_00_00": "analysis"}, -# { -# "analysis_foo_bar_date_2026_01_01_00_00_00": "analysis_foo_bar_date_2026_01_01_00_00_00", -# "analysis_foo_bar_date_2026_07_01_00_00_00": "analysis_foo_bar_date_2026_07_01_00_00_00", -# }, -# { -# "analysis_foo_bar_date_2027_01_01_00_00_00": "analysis_foo_bar_date_2027_01_01_00_00_00", -# "analysis_foo_bar_date_2027_07_01_00_00_00": "analysis_foo_bar_date_2027_07_01_00_00_00", -# }, -# ] - -# expected_arguments_list = [ -# "--restart --init {initial_conditions} --forcing {forcing}", -# "--restart --init {initial_conditions} --forcing {forcing}", -# "--restart {icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00} --init " -# "--forcing {forcing}", -# "--restart {icon_restart_foo_1___bar_3_0___date_2026_01_01_00_00_00} --init " -# "--forcing {forcing}", -# "--restart {icon_restart_foo_0___bar_3_0___date_2026_07_01_00_00_00} --init " -# "--forcing {forcing}", -# "--restart {icon_restart_foo_1___bar_3_0___date_2026_07_01_00_00_00} --init " -# "--forcing {forcing}", -# "--restart {icon_restart_foo_0___bar_3_0___date_2027_01_01_00_00_00} --init " -# "--forcing {forcing}", -# "--restart {icon_restart_foo_1___bar_3_0___date_2027_01_01_00_00_00} --init " -# "--forcing {forcing}", -# "{icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00} " -# "{icon_output_foo_1___bar_3_0___date_2026_01_01_00_00_00}", -# "{icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00} " -# "{icon_output_foo_1___bar_3_0___date_2026_07_01_00_00_00}", -# "{icon_output_foo_0___bar_3_0___date_2027_01_01_00_00_00} " -# "{icon_output_foo_1___bar_3_0___date_2027_01_01_00_00_00}", -# "{icon_output_foo_0___bar_3_0___date_2027_07_01_00_00_00} " -# "{icon_output_foo_1___bar_3_0___date_2027_07_01_00_00_00}", -# "{analysis_foo_bar_3_0___date_2026_01_01_00_00_00}", -# "{analysis_foo_bar_3_0___date_2026_07_01_00_00_00}", -# "{analysis_foo_bar_3_0___date_2027_01_01_00_00_00}", -# "{analysis_foo_bar_3_0___date_2027_07_01_00_00_00}", -# "{analysis_foo_bar_date_2026_01_01_00_00_00} " -# "{analysis_foo_bar_date_2026_07_01_00_00_00}", -# "{analysis_foo_bar_date_2027_01_01_00_00_00} " -# "{analysis_foo_bar_date_2027_07_01_00_00_00}", -# ] - -# expected_nodes_list = [ -# ["initial_conditions", "forcing"], -# ["initial_conditions", "forcing"], -# ["icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00", "forcing"], -# ["icon_restart_foo_1___bar_3_0___date_2026_01_01_00_00_00", "forcing"], -# ["icon_restart_foo_0___bar_3_0___date_2026_07_01_00_00_00", "forcing"], -# ["icon_restart_foo_1___bar_3_0___date_2026_07_01_00_00_00", "forcing"], -# ["icon_restart_foo_0___bar_3_0___date_2027_01_01_00_00_00", "forcing"], -# ["icon_restart_foo_1___bar_3_0___date_2027_01_01_00_00_00", "forcing"], -# [ -# "icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00", -# "icon_output_foo_1___bar_3_0___date_2026_01_01_00_00_00", -# ], -# [ -# "icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00", -# "icon_output_foo_1___bar_3_0___date_2026_07_01_00_00_00", -# ], -# [ -# "icon_output_foo_0___bar_3_0___date_2027_01_01_00_00_00", -# "icon_output_foo_1___bar_3_0___date_2027_01_01_00_00_00", -# ], -# [ -# "icon_output_foo_0___bar_3_0___date_2027_07_01_00_00_00", -# "icon_output_foo_1___bar_3_0___date_2027_07_01_00_00_00", -# ], -# ["analysis_foo_bar_3_0___date_2026_01_01_00_00_00"], -# ["analysis_foo_bar_3_0___date_2026_07_01_00_00_00"], -# ["analysis_foo_bar_3_0___date_2027_01_01_00_00_00"], -# ["analysis_foo_bar_3_0___date_2027_07_01_00_00_00"], -# [ -# "analysis_foo_bar_date_2026_01_01_00_00_00", -# "analysis_foo_bar_date_2026_07_01_00_00_00", -# ], -# [ -# "analysis_foo_bar_date_2027_01_01_00_00_00", -# "analysis_foo_bar_date_2027_07_01_00_00_00", -# ], -# ] - -# assert arguments_list == expected_arguments_list -# assert filenames_list == expected_filenames_list -# assert nodes_list == expected_nodes_list +@pytest.mark.usefixtures("aiida_localhost") +def test_parameterized_workflow_regression(tmp_path): + """Regression test for exact parameterized workflow output.""" + yaml_str = textwrap.dedent(f""" + start_date: "2026-01-01T00:00" + stop_date: "2026-07-01T00:00" + cycles: + - simulation: + cycling: + start_date: "2026-01-01T00:00" + stop_date: "2026-07-01T00:00" + period: P6M + tasks: + - simulate: + inputs: + - initial_data: + port: input + outputs: [sim_result] + - analysis: + cycling: + start_date: "2026-01-01T00:00" + stop_date: "2026-07-01T00:00" + period: P6M + tasks: + - analyze: + inputs: + - sim_result: + parameters: {{param: all}} + port: data + outputs: [final_result] + tasks: + - simulate: + plugin: shell + command: "simulate.py {{PORT::input}}" + src: {tmp_path}/simulate.py + parameters: [param] + computer: localhost + - analyze: + plugin: shell + command: "analyze.py {{PORT::data}}" + src: {tmp_path}/analyze.py + parameters: [param] + computer: localhost + data: + available: + - initial_data: + type: file + src: {tmp_path}/input.dat + computer: localhost + generated: + - sim_result: + type: file + src: result.dat + parameters: [param] + - final_result: + type: file + src: final.dat + parameters: [param] + parameters: + param: [1, 2] + """) + + config_file = tmp_path / "config.yml" + config_file.write_text(yaml_str) + + # Create minimal required files + (tmp_path / "input.dat").touch() + (tmp_path / "simulate.py").touch() + (tmp_path / "analyze.py").touch() + + core_wf = Workflow.from_config_file(str(config_file)) + aiida_wf = AiidaWorkGraph(core_wf) + + # Regression testing: verify structure + analyze_tasks = [t for t in aiida_wf._workgraph.tasks if t.name.startswith("analyze")] + assert len(analyze_tasks) == 2 # One for each param value + + task = analyze_tasks[0] # Test one of the analyze tasks + filenames = task.inputs.filenames.value + arguments = task.inputs.arguments.value + nodes_keys = list(task.inputs.nodes._sockets.keys()) + + # Expected values for regression detection + expected_keys = ["sim_result_param_1___date_2026_01_01_00_00_00", "sim_result_param_2___date_2026_01_01_00_00_00"] + expected_filenames = { + "sim_result_param_1___date_2026_01_01_00_00_00": "sim_result_param_1___date_2026_01_01_00_00_00", + "sim_result_param_2___date_2026_01_01_00_00_00": "sim_result_param_2___date_2026_01_01_00_00_00", + } + expected_arguments = ( + "{sim_result_param_1___date_2026_01_01_00_00_00} {sim_result_param_2___date_2026_01_01_00_00_00}" + ) + + assert set(nodes_keys) == set(expected_keys) + assert filenames == expected_filenames + assert arguments == expected_arguments + + +@pytest.mark.usefixtures("aiida_localhost") +def test_comprehensive_parameterized_explicit(tmp_path): + yaml_str = textwrap.dedent( + """ + start_date: &root_start_date "2026-01-01T00:00" + stop_date: &root_stop_date "2028-01-01T00:00" + cycles: + - bimonthly_tasks: + cycling: + start_date: *root_start_date + stop_date: *root_stop_date + period: P6M + tasks: + - icon: + inputs: + - initial_conditions: + when: + at: *root_start_date + port: init + - icon_restart: + when: + after: *root_start_date + target_cycle: + lag: -P6M + parameters: + foo: single + bar: single + port: restart + - forcing: + port: forcing + outputs: [icon_output, icon_restart] + - statistics_foo: + inputs: + - icon_output: + parameters: + bar: single + port: None + outputs: [analysis_foo] + - statistics_foo_bar: + inputs: + - analysis_foo: + port: None + outputs: [analysis_foo_bar] + - yearly: + cycling: + start_date: *root_start_date + stop_date: *root_stop_date + period: P1Y + tasks: + - merge: + inputs: + - analysis_foo_bar: + target_cycle: + lag: ["P0M", "P6M"] + port: None + outputs: [yearly_analysis] + tasks: + - icon: + plugin: shell + src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/icon.py + command: "icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}" + parameters: [foo, bar] + computer: localhost + - statistics_foo: + plugin: shell + src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/statistics.py + command: "statistics.py {PORT::None}" + parameters: [bar] + computer: localhost + - statistics_foo_bar: + plugin: shell + src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/statistics.py + command: "statistics.py {PORT::None}" + computer: localhost + - merge: + plugin: shell + src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/merge.py + command: "merge.py {PORT::None}" + computer: localhost + data: + available: + - initial_conditions: + type: file + src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/small/config/data/initial_conditions + computer: localhost + - forcing: + type: file + src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/data/forcing + computer: localhost + generated: + - icon_output: + type: file + src: icon_output + parameters: [foo, bar] + - icon_restart: + type: file + src: restart + parameters: [foo, bar] + - analysis_foo: + type: file + src: analysis + parameters: [bar] + - analysis_foo_bar: + type: file + src: analysis + - yearly_analysis: + type: file + src: analysis + parameters: + foo: [0, 1] + bar: [3.0] + """ + ) + yaml_file = tmp_path / "config.yml" + yaml_file.write_text(yaml_str) + + core_wf = Workflow.from_config_file(yaml_file) + aiida_wf = AiidaWorkGraph(core_workflow=core_wf) + filenames_list = [task.inputs.filenames.value for task in aiida_wf._workgraph.tasks] + arguments_list = [task.inputs.arguments.value for task in aiida_wf._workgraph.tasks] + nodes_list = [ + list(task.inputs.nodes._sockets.keys()) for task in aiida_wf._workgraph.tasks + ] + + expected_filenames_list = [ + {"forcing": "forcing", "initial_conditions": "initial_conditions"}, + {"forcing": "forcing", "initial_conditions": "initial_conditions"}, + { + "forcing": "forcing", + "icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00": "restart", + }, + { + "forcing": "forcing", + "icon_restart_foo_1___bar_3_0___date_2026_01_01_00_00_00": "restart", + }, + { + "forcing": "forcing", + "icon_restart_foo_0___bar_3_0___date_2026_07_01_00_00_00": "restart", + }, + { + "forcing": "forcing", + "icon_restart_foo_1___bar_3_0___date_2026_07_01_00_00_00": "restart", + }, + { + "forcing": "forcing", + "icon_restart_foo_0___bar_3_0___date_2027_01_01_00_00_00": "restart", + }, + { + "forcing": "forcing", + "icon_restart_foo_1___bar_3_0___date_2027_01_01_00_00_00": "restart", + }, + { + "icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00": "icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00", + "icon_output_foo_1___bar_3_0___date_2026_01_01_00_00_00": "icon_output_foo_1___bar_3_0___date_2026_01_01_00_00_00", + }, + { + "icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00": "icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00", + "icon_output_foo_1___bar_3_0___date_2026_07_01_00_00_00": "icon_output_foo_1___bar_3_0___date_2026_07_01_00_00_00", + }, + { + "icon_output_foo_0___bar_3_0___date_2027_01_01_00_00_00": "icon_output_foo_0___bar_3_0___date_2027_01_01_00_00_00", + "icon_output_foo_1___bar_3_0___date_2027_01_01_00_00_00": "icon_output_foo_1___bar_3_0___date_2027_01_01_00_00_00", + }, + { + "icon_output_foo_0___bar_3_0___date_2027_07_01_00_00_00": "icon_output_foo_0___bar_3_0___date_2027_07_01_00_00_00", + "icon_output_foo_1___bar_3_0___date_2027_07_01_00_00_00": "icon_output_foo_1___bar_3_0___date_2027_07_01_00_00_00", + }, + {"analysis_foo_bar_3_0___date_2026_01_01_00_00_00": "analysis"}, + {"analysis_foo_bar_3_0___date_2026_07_01_00_00_00": "analysis"}, + {"analysis_foo_bar_3_0___date_2027_01_01_00_00_00": "analysis"}, + {"analysis_foo_bar_3_0___date_2027_07_01_00_00_00": "analysis"}, + { + "analysis_foo_bar_date_2026_01_01_00_00_00": "analysis_foo_bar_date_2026_01_01_00_00_00", + "analysis_foo_bar_date_2026_07_01_00_00_00": "analysis_foo_bar_date_2026_07_01_00_00_00", + }, + { + "analysis_foo_bar_date_2027_01_01_00_00_00": "analysis_foo_bar_date_2027_01_01_00_00_00", + "analysis_foo_bar_date_2027_07_01_00_00_00": "analysis_foo_bar_date_2027_07_01_00_00_00", + }, + ] + + expected_arguments_list = [ + "--restart --init {initial_conditions} --forcing {forcing}", + "--restart --init {initial_conditions} --forcing {forcing}", + "--restart {icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00} --init " + "--forcing {forcing}", + "--restart {icon_restart_foo_1___bar_3_0___date_2026_01_01_00_00_00} --init " + "--forcing {forcing}", + "--restart {icon_restart_foo_0___bar_3_0___date_2026_07_01_00_00_00} --init " + "--forcing {forcing}", + "--restart {icon_restart_foo_1___bar_3_0___date_2026_07_01_00_00_00} --init " + "--forcing {forcing}", + "--restart {icon_restart_foo_0___bar_3_0___date_2027_01_01_00_00_00} --init " + "--forcing {forcing}", + "--restart {icon_restart_foo_1___bar_3_0___date_2027_01_01_00_00_00} --init " + "--forcing {forcing}", + "{icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00} " + "{icon_output_foo_1___bar_3_0___date_2026_01_01_00_00_00}", + "{icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00} " + "{icon_output_foo_1___bar_3_0___date_2026_07_01_00_00_00}", + "{icon_output_foo_0___bar_3_0___date_2027_01_01_00_00_00} " + "{icon_output_foo_1___bar_3_0___date_2027_01_01_00_00_00}", + "{icon_output_foo_0___bar_3_0___date_2027_07_01_00_00_00} " + "{icon_output_foo_1___bar_3_0___date_2027_07_01_00_00_00}", + "{analysis_foo_bar_3_0___date_2026_01_01_00_00_00}", + "{analysis_foo_bar_3_0___date_2026_07_01_00_00_00}", + "{analysis_foo_bar_3_0___date_2027_01_01_00_00_00}", + "{analysis_foo_bar_3_0___date_2027_07_01_00_00_00}", + "{analysis_foo_bar_date_2026_01_01_00_00_00} " + "{analysis_foo_bar_date_2026_07_01_00_00_00}", + "{analysis_foo_bar_date_2027_01_01_00_00_00} " + "{analysis_foo_bar_date_2027_07_01_00_00_00}", + ] + + expected_nodes_list = [ + ["initial_conditions", "forcing"], + ["initial_conditions", "forcing"], + ["icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00", "forcing"], + ["icon_restart_foo_1___bar_3_0___date_2026_01_01_00_00_00", "forcing"], + ["icon_restart_foo_0___bar_3_0___date_2026_07_01_00_00_00", "forcing"], + ["icon_restart_foo_1___bar_3_0___date_2026_07_01_00_00_00", "forcing"], + ["icon_restart_foo_0___bar_3_0___date_2027_01_01_00_00_00", "forcing"], + ["icon_restart_foo_1___bar_3_0___date_2027_01_01_00_00_00", "forcing"], + [ + "icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00", + "icon_output_foo_1___bar_3_0___date_2026_01_01_00_00_00", + ], + [ + "icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00", + "icon_output_foo_1___bar_3_0___date_2026_07_01_00_00_00", + ], + [ + "icon_output_foo_0___bar_3_0___date_2027_01_01_00_00_00", + "icon_output_foo_1___bar_3_0___date_2027_01_01_00_00_00", + ], + [ + "icon_output_foo_0___bar_3_0___date_2027_07_01_00_00_00", + "icon_output_foo_1___bar_3_0___date_2027_07_01_00_00_00", + ], + ["analysis_foo_bar_3_0___date_2026_01_01_00_00_00"], + ["analysis_foo_bar_3_0___date_2026_07_01_00_00_00"], + ["analysis_foo_bar_3_0___date_2027_01_01_00_00_00"], + ["analysis_foo_bar_3_0___date_2027_07_01_00_00_00"], + [ + "analysis_foo_bar_date_2026_01_01_00_00_00", + "analysis_foo_bar_date_2026_07_01_00_00_00", + ], + [ + "analysis_foo_bar_date_2027_01_01_00_00_00", + "analysis_foo_bar_date_2027_07_01_00_00_00", + ], + ] + + assert arguments_list == expected_arguments_list + assert filenames_list == expected_filenames_list + assert nodes_list == expected_nodes_list From 31268f02c320d91fd58ecda5566d7853d70b6064 Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Mon, 2 Jun 2025 17:34:46 +0200 Subject: [PATCH 14/27] . --- src/sirocco/cli.py | 247 ---------------------------------------- tests/test_workgraph.py | 145 +++++++++++++++++++++-- 2 files changed, 134 insertions(+), 258 deletions(-) delete mode 100644 src/sirocco/cli.py diff --git a/src/sirocco/cli.py b/src/sirocco/cli.py deleted file mode 100644 index e5f0e507..00000000 --- a/src/sirocco/cli.py +++ /dev/null @@ -1,247 +0,0 @@ -import typer -from pathlib import Path -from typing import Optional -from rich.console import Console -from rich.traceback import install as install_rich_traceback - -from sirocco import parsing -from sirocco import core -from sirocco import vizgraph -from sirocco import pretty_print -from sirocco.workgraph import AiidaWorkGraph -import aiida - - -# --- Typer App and Rich Console Setup --- -# Install rich tracebacks for beautiful error reporting -install_rich_traceback(show_locals=False) - -# Create the Typer app instance -app = typer.Typer( - help="Sirocco Weather and Climate Workflow Management Tool.", - add_completion=True, # Optional: disable shell completion installation prompts -) - -# Create a Rich console instance for printing -console = Console() - - -# --- Helper functions --- -def load_aiida_profile(profile: Optional[str] = None): - try: - aiida.load_profile(profile=profile, allow_switch=True) # Allow switch for flexibility - # console.print(f"ℹ️ AiiDA profile [green]'{aiida.get_profile().name}'[/green] loaded.") - except Exception as e: - console.print(f"[bold red]Failed to load AiiDA profile '{profile if profile else 'default'}': {e}[/bold red]") - console.print("Ensure an AiiDA profile exists and the AiiDA daemon is configured if submitting.") - raise typer.Exit(code=1) - - -def _prepare_aiida_workgraph(workflow_file_str: str, aiida_profile_name: Optional[str]) -> AiidaWorkGraph: - """Helper to load profile, config, and prepare AiidaWorkGraph.""" - load_aiida_profile(aiida_profile_name) - try: - config_workflow = parsing.ConfigWorkflow.from_config_file(workflow_file_str) - core_wf = core.Workflow.from_config_workflow(config_workflow) - aiida_wg = AiidaWorkGraph(core_wf) - console.print(f"⚙️ Workflow [magenta]'{core_wf.name}'[/magenta] prepared for AiiDA execution.") - return aiida_wg - except Exception as e: - console.print(f"[bold red]❌ Failed to prepare workflow for AiiDA: {e}[/bold red]") - console.print_exception() - raise typer.Exit(code=1) - - -# --- CLI Commands --- - - -@app.command() -def verify( - workflow_file: Path = typer.Argument( - ..., # Ellipsis indicates a required argument - exists=True, - file_okay=True, - dir_okay=False, - readable=True, - help="Path to the workflow definition YAML file.", - ), -): - """ - Validate the workflow definition file for syntax and basic consistency. - """ - console.print(f"🔍 Verifying workflow file: [cyan]{workflow_file}[/cyan]") - try: - # Attempt to load and validate the configuration - parsing.ConfigWorkflow.from_config_file(str(workflow_file)) - console.print("[green]✅ Workflow definition is valid.[/green]") - except Exception: - console.print("[bold red]❌ Workflow validation failed:[/bold red]") - # Rich traceback handles printing the exception nicely - console.print_exception() - raise typer.Exit(code=1) - - -@app.command() -def visualize( - workflow_file: Path = typer.Argument( - ..., - exists=True, - file_okay=True, - dir_okay=False, - readable=True, - help="Path to the workflow definition YAML file.", - ), - output_file: Optional[Path] = typer.Option( - None, # Default value is None, making it optional - "--output", - "-o", - writable=True, # Check if the path (or its parent dir) is writable - file_okay=True, - dir_okay=False, - help="Optional path to save the output SVG file.", - ), -): - """ - Generate an interactive SVG visualization of the unrolled workflow. - """ - console.print(f"📊 Visualizing workflow from: [cyan]{workflow_file}[/cyan]") - try: - # 1. Load configuration - config_workflow = parsing.ConfigWorkflow.from_config_file(str(workflow_file)) - - # 2. Create the core workflow representation (unrolls parameters/cycles) - core_workflow = core.Workflow.from_config_workflow(config_workflow) - - # 3. Create the visualization graph - viz_graph = vizgraph.VizGraph.from_core_workflow(core_workflow) - - # 4. Determine output path - if output_file is None: - # Default output name based on workflow name in the same directory - output_path = workflow_file.parent / f"{core_workflow.name}.svg" - else: - output_path = output_file - - # Ensure the output directory exists - output_path.parent.mkdir(parents=True, exist_ok=True) - - # 5. Draw the graph - viz_graph.draw(file_path=output_path) - - console.print(f"[green]✅ Visualization saved to:[/green] [cyan]{output_path.resolve()}[/cyan]") - - except Exception: - console.print("[bold red]❌ Failed to generate visualization:[/bold red]") - console.print_exception() - raise typer.Exit(code=1) - - -@app.command() -def represent( - workflow_file: Path = typer.Argument( - ..., - exists=True, - file_okay=True, - dir_okay=False, - readable=True, - help="Path to the workflow definition YAML file.", - ), -): - """ - Display the text representation of the unrolled workflow graph. - """ - console.print(f"📄 Representing workflow from: [cyan]{workflow_file}[/cyan]") - try: - config_workflow = parsing.ConfigWorkflow.from_config_file(str(workflow_file)) - core_workflow = core.Workflow.from_config_workflow(config_workflow) - - printer = pretty_print.PrettyPrinter(colors=False) - output_from_printer = printer.format(core_workflow) - - console.print(output_from_printer) - - except Exception: - console.print("[bold red]❌ Failed to represent workflow:[/bold red]") - console.print_exception() - raise typer.Exit(code=1) - - -@app.command(help="Run the workflow in a blocking fashion.") -def run( - workflow_file: Path = typer.Argument( - ..., - exists=True, - file_okay=True, - dir_okay=False, - readable=True, - help="Path to the workflow definition YAML file.", - ), - aiida_profile: Optional[str] = typer.Option( - None, "--aiida-profile", "-P", help="AiiDA profile to use (defaults to current active)." - ), -): - aiida_wg = _prepare_aiida_workgraph(str(workflow_file), aiida_profile) - try: - console.print(f"▶️ Running workflow [magenta]'{aiida_wg._core_workflow.name}'[/magenta] directly (blocking)...") - results = aiida_wg.run(inputs=None) # No metadata - console.print("[green]✅ Workflow execution finished.[/green]") - console.print("Results:") - if isinstance(results, dict): - for k, v in results.items(): - console.print(f" [bold blue]{k}[/bold blue]: {v}") - else: - console.print(f" {results}") - except Exception as e: - console.print(f"[bold red]❌ Workflow execution failed during run: {e}[/bold red]") - console.print_exception() - raise typer.Exit(code=1) - - -@app.command(help="Submit the workflow to the AiiDA daemon.") -def submit( - workflow_file: Path = typer.Argument( - ..., - exists=True, - file_okay=True, - dir_okay=False, - readable=True, - help="Path to the workflow definition YAML file.", - ), - aiida_profile: Optional[str] = typer.Option( - None, "--aiida-profile", "-P", help="AiiDA profile to use (defaults to current active)." - ), - wait: bool = typer.Option(False, "--wait", "-w", help="Wait for the workflow to complete after submission."), - timeout: int = typer.Option( # Default AiiDA timeout for wait is often very long or infinite - 3600, "--timeout", "-t", help="Timeout in seconds when waiting (if --wait is used)." - ), -): - """Submit the workflow to the AiiDA daemon.""" - - aiida_wg = _prepare_aiida_workgraph(str(workflow_file), aiida_profile) - try: - console.print(f"🚀 Submitting workflow [magenta]'{aiida_wg._core_workflow.name}'[/magenta] to AiiDA daemon...") - # No metadata passed to submit - results_node = aiida_wg.submit(inputs=None, wait=wait, timeout=timeout if wait else None) - - if isinstance(results_node, aiida.orm.WorkChainNode): - console.print(f"[green]✅ Workflow submitted. PK: {results_node.pk}[/green]") - if wait: - console.print( - f"🏁 Workflow completed. Final state: [bold { 'green' if results_node.is_finished_ok else 'red' }]{results_node.process_state.value.upper()}[/bold { 'green' if results_node.is_finished_ok else 'red' }]" - ) - if not results_node.is_finished_ok: - console.print( - "[yellow]Inspect the workchain for more details (e.g., `verdi process report PK`).[/yellow]" - ) - else: # Should typically be a WorkChainNode - console.print(f"[green]✅ Submission initiated. Result: {results_node}[/green]") - - except Exception as e: - console.print(f"[bold red]❌ Workflow submission failed: {e}[/bold red]") - console.print_exception() - raise typer.Exit(code=1) - - -# --- Main entry point for the script --- -if __name__ == "__main__": - app() diff --git a/tests/test_workgraph.py b/tests/test_workgraph.py index fdd52655..8cbdeceb 100644 --- a/tests/test_workgraph.py +++ b/tests/test_workgraph.py @@ -462,8 +462,13 @@ def test_parameterized_workflow_regression(tmp_path): @pytest.mark.usefixtures("aiida_localhost") def test_comprehensive_parameterized_explicit(tmp_path): + import pathlib + + # Get the test cases directory relative to the test file + test_dir = pathlib.Path(__file__).parent.parent / "cases" + yaml_str = textwrap.dedent( - """ + f""" start_date: &root_start_date "2026-01-01T00:00" stop_date: &root_stop_date "2028-01-01T00:00" cycles: @@ -519,35 +524,35 @@ def test_comprehensive_parameterized_explicit(tmp_path): tasks: - icon: plugin: shell - src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/icon.py - command: "icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}" + src: {test_dir}/parameters/config/scripts/icon.py + command: "icon.py --restart {{PORT::restart}} --init {{PORT::init}} --forcing {{PORT::forcing}}" parameters: [foo, bar] computer: localhost - statistics_foo: plugin: shell - src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/statistics.py - command: "statistics.py {PORT::None}" + src: {test_dir}/parameters/config/scripts/statistics.py + command: "statistics.py {{PORT::None}}" parameters: [bar] computer: localhost - statistics_foo_bar: plugin: shell - src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/statistics.py - command: "statistics.py {PORT::None}" + src: {test_dir}/parameters/config/scripts/statistics.py + command: "statistics.py {{PORT::None}}" computer: localhost - merge: plugin: shell - src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/merge.py - command: "merge.py {PORT::None}" + src: {test_dir}/parameters/config/scripts/merge.py + command: "merge.py {{PORT::None}}" computer: localhost data: available: - initial_conditions: type: file - src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/small/config/data/initial_conditions + src: {test_dir}/small/config/data/initial_conditions computer: localhost - forcing: type: file - src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/data/forcing + src: {test_dir}/parameters/config/data/forcing computer: localhost generated: - icon_output: @@ -573,6 +578,117 @@ def test_comprehensive_parameterized_explicit(tmp_path): bar: [3.0] """ ) + # yaml_str = textwrap.dedent( + # """ + # start_date: &root_start_date "2026-01-01T00:00" + # stop_date: &root_stop_date "2028-01-01T00:00" + # cycles: + # - bimonthly_tasks: + # cycling: + # start_date: *root_start_date + # stop_date: *root_stop_date + # period: P6M + # tasks: + # - icon: + # inputs: + # - initial_conditions: + # when: + # at: *root_start_date + # port: init + # - icon_restart: + # when: + # after: *root_start_date + # target_cycle: + # lag: -P6M + # parameters: + # foo: single + # bar: single + # port: restart + # - forcing: + # port: forcing + # outputs: [icon_output, icon_restart] + # - statistics_foo: + # inputs: + # - icon_output: + # parameters: + # bar: single + # port: None + # outputs: [analysis_foo] + # - statistics_foo_bar: + # inputs: + # - analysis_foo: + # port: None + # outputs: [analysis_foo_bar] + # - yearly: + # cycling: + # start_date: *root_start_date + # stop_date: *root_stop_date + # period: P1Y + # tasks: + # - merge: + # inputs: + # - analysis_foo_bar: + # target_cycle: + # lag: ["P0M", "P6M"] + # port: None + # outputs: [yearly_analysis] + # tasks: + # - icon: + # plugin: shell + # src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/icon.py + # command: "icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}" + # parameters: [foo, bar] + # computer: localhost + # - statistics_foo: + # plugin: shell + # src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/statistics.py + # command: "statistics.py {PORT::None}" + # parameters: [bar] + # computer: localhost + # - statistics_foo_bar: + # plugin: shell + # src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/statistics.py + # command: "statistics.py {PORT::None}" + # computer: localhost + # - merge: + # plugin: shell + # src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/merge.py + # command: "merge.py {PORT::None}" + # computer: localhost + # data: + # available: + # - initial_conditions: + # type: file + # src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/small/config/data/initial_conditions + # computer: localhost + # - forcing: + # type: file + # src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/data/forcing + # computer: localhost + # generated: + # - icon_output: + # type: file + # src: icon_output + # parameters: [foo, bar] + # - icon_restart: + # type: file + # src: restart + # parameters: [foo, bar] + # - analysis_foo: + # type: file + # src: analysis + # parameters: [bar] + # - analysis_foo_bar: + # type: file + # src: analysis + # - yearly_analysis: + # type: file + # src: analysis + # parameters: + # foo: [0, 1] + # bar: [3.0] + # """ + # ) yaml_file = tmp_path / "config.yml" yaml_file.write_text(yaml_str) @@ -716,3 +832,10 @@ def test_comprehensive_parameterized_explicit(tmp_path): assert arguments_list == expected_arguments_list assert filenames_list == expected_filenames_list assert nodes_list == expected_nodes_list + + # PRCOMMENT: Introduce this once we can automatically create the codes in a reasonable way. + # Currently, it still fails... + # output_node = aiida_wf.run() + # assert ( + # output_node.is_finished_ok + # ), f"Not successful run. Got exit code {output_node.exit_code} with message {output_node.exit_message}." From 221cfc9b38592a05743ff5c85cec935ff0121f4c Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Mon, 2 Jun 2025 17:36:57 +0200 Subject: [PATCH 15/27] . --- pyproject.toml | 5 - .../config/ICON/NAMELIST_exclaim_ape_R02B04 | 206 -------------- .../remote/config/ICON/icon_master.namelist | 23 -- tests/cases/remote/config/config.yml | 107 -------- tests/cases/remote/config/data/forcing | 0 .../remote/config/data/initial_conditions | 0 tests/cases/remote/config/scripts/icon.py | 70 ----- tests/cases/remote/config/scripts/merge.py | 15 -- .../cases/remote/config/scripts/statistics.py | 15 -- tests/cases/remote/data/config.txt | 251 ------------------ tests/cases/remote/svg/.gitkeep | 0 11 files changed, 692 deletions(-) delete mode 100644 tests/cases/remote/config/ICON/NAMELIST_exclaim_ape_R02B04 delete mode 100644 tests/cases/remote/config/ICON/icon_master.namelist delete mode 100644 tests/cases/remote/config/config.yml delete mode 100644 tests/cases/remote/config/data/forcing delete mode 100644 tests/cases/remote/config/data/initial_conditions delete mode 100755 tests/cases/remote/config/scripts/icon.py delete mode 100755 tests/cases/remote/config/scripts/merge.py delete mode 100755 tests/cases/remote/config/scripts/statistics.py delete mode 100644 tests/cases/remote/data/config.txt delete mode 100644 tests/cases/remote/svg/.gitkeep diff --git a/pyproject.toml b/pyproject.toml index 8d656f45..42c55e9a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,8 +36,6 @@ dependencies = [ "pygraphviz", "lxml", "f90nml", - "rich", - "typer[all]", "aiida-shell @ git+https://github.com/sphuber/aiida-shell.git@fix/105/handle-remote-data-argument-placeholders", ] license = {file = "LICENSE"} @@ -47,9 +45,6 @@ Repository = "https://github.com/C2SM/Sirocco.git" Documentation = "https://c2sm.github.io/Sirocco/" Changelog = "https://github.com/C2SM/Sirocco/blob/main/CHANGELOG.md" -[project.scripts] -sirocco = "sirocco.cli:app" - [tool.pytest.ini_options] # Configuration for [pytest](https://docs.pytest.org) addopts = "--pdbcls=IPython.terminal.debugger:TerminalPdb" diff --git a/tests/cases/remote/config/ICON/NAMELIST_exclaim_ape_R02B04 b/tests/cases/remote/config/ICON/NAMELIST_exclaim_ape_R02B04 deleted file mode 100644 index 449bbb7e..00000000 --- a/tests/cases/remote/config/ICON/NAMELIST_exclaim_ape_R02B04 +++ /dev/null @@ -1,206 +0,0 @@ -! parallel_nml: MPI parallelization ------------------------------------------ -¶llel_nml - nproma = 48 ! loop chunk length - nblocks_c = 0 ! loop number of cell blocks - nproma_sub = 48 ! loop chunk length for radiation scheme - p_test_run = .FALSE. ! .TRUE. means verification run for MPI parallelization - num_io_procs = 0 ! number of I/O processors - num_restart_procs = 0 ! number of restart processors - iorder_sendrecv = 3 ! sequence of MPI send/receive calls -/ - -! grid_nml: horizontal grid -------------------------------------------------- -&grid_nml - dynamics_grid_filename = " ./icon_grid_0013_R02B04_R.nc" ! array of the grid filenames for the dycore - lredgrid_phys = .FALSE. ! .true.=radiation is calculated on a reduced grid -/ - -! run_nml: general switches --------------------------------------------------- -&run_nml - num_lev = 60 ! number of full levels of vertical grid - dtime = 2 ! [s] timestep in seconds - ldynamics = .TRUE. ! dynamics --> dynamics_ctl, diffusion_ctl - ltransport = .TRUE. ! switch on tracer transport --> transport_ctl - iforcing = 3 ! 0: dynamical core only, 1: Held-Suarez, 2: ECHAM, 3: NWP - ltestcase = .TRUE. ! run testcase --> testcase_ctl - ntracer = 5 ! number of tracers - msg_level = 12 ! detailed report during integration - ltimer = .TRUE. ! timer for monitoring the runtime of specific routines - timers_level = 10 ! performance timer granularity - check_uuid_gracefully = .TRUE. ! give only warnings for non-matching uuids - output = "nml" ! main switch for enabling/disabling components of the model output -/ - -! nwp_phy_nml: switches for the physics schemes ------------------------------ -&nwp_phy_nml - inwp_gscp = 2 ! cloud microphysics and precipitation 0: none, 1: hydci, 2: hydci_gr - mu_rain = 0.5 ! shap parameter in gamma distribution for rain - rain_n0_factor = 0.1 ! tuning factor for intercept parameter of raindrop size distr. - inwp_convection = 1 ! convection - lshallowconv_only = .FALSE. ! only shallow convection - inwp_radiation = 4 ! 1: RRTM, 2: Ritter-Geleyn 4: ecRad: - latm_above_top = .TRUE. ! take into account atmosphere above model top for radiation computation - inwp_cldcover = 1 ! cloud cover scheme for radiation 5: all or nothing - inwp_turb = 1 ! 1: Raschendorfer, 5: Smagorinsky diffusion (Dipankar et al.) - inwp_satad = 1 ! saturation adjustment - inwp_sso = 0 ! subgrid scale orographic drag - inwp_gwd = 1 ! non-orographic gravity wave drag - inwp_surface = 0 ! surface scheme (0: None, 1: TERRA) - dt_rad = 12 ! time step for radiation in s - dt_ccov = 4 - dt_gwd = 8 - dt_conv = 4 -/ - -! turbdiff_nml: turbulent diffusion ------------------------------------------- -&turbdiff_nml - tkhmin = 0.01 ! minimum limiter for Ks (needed for stability, but unphysical) - tkhmin_strat = 0.01 - tkmmin = 0.01 - tkmmin_strat = 0.01 - rlam_heat = 1.0 ! Scaling factor of the laminar boundary layer for heat (scalars) - pat_len = 100. ! effective length scale of thermal surface patterns - rat_sea = 9.0 ! Ratio of laminar scaling factors for scalars over sea and land. - q_crit = 1.6 ! critical value for normalized super-saturation - tur_len = 150 ! Asymptotic maximal turbulent distance -/ - -! radiation_nml: radiation scheme --------------------------------------------- -&radiation_nml -ecrad_isolver = 2 -izenith = 3 ! zenith angle formula for the radiative transfer computation 3: perpetual equinox - albedo_type = 3 ! type of surface albedo - albedo_fixed = 0.2 ! DL: Discuss value! - irad_o3 = 0 ! ozone climatology 7: GEMS ozone climatology !DL: Want 4 for Aquaplanet - irad_aero = 2 ! aerosol climatology 0: no aerosols 2: global constant - irad_h2o = 1 - irad_co2 = 0 - irad_ch4 = 0 - irad_n2o = 0 - irad_o2 = 0 - irad_cfc11 = 0 - irad_cfc12 = 0 - icld_overlap = 2 ! Generalized Random - ecrad_data_path = './ecrad_data' ! Optical property files path ecRad (link files as path is truncated inside ecrad) -/ - -! nonhydrostatic_nml: nonhydrostatic model ----------------------------------- -&nonhydrostatic_nml - iadv_rhotheta = 2 ! advection method for rho and rhotheta 2: Miura 2nd order - ivctype = 2 ! type of vertical coordinate 1: gal-chen hybrid 2:sleve - itime_scheme = 4 ! time integration scheme - ndyn_substeps = 5 ! number of dynamics steps per fast-physics step - damp_height = 50000.0 ! height at which Rayleigh damping of vertical wind starts - rayleigh_coeff = 0.1 ! Rayleigh damping coefficient - divdamp_order = 4 ! order of divergence damping - divdamp_type = 3 ! type of divergence damping - divdamp_fac = 0.0025 ! scaling factor for divergence damping - l_zdiffu_t = .FALSE. ! specifies computation of Smagorinsky temperature diffusion - htop_moist_proc = 22500.0 ! max. height for moist physics - hbot_qvsubstep = 22500.0 ! height above which QV is advected with substepping scheme -/ - -! sleve_nml: vertical level specification ------------------------------------- -&sleve_nml ! vertical grid standard output for message level >= 15 - min_lay_thckn = 50. ! lowest level thickness (between half-levels) - top_height = 75000. ! Height of model Top - stretch_fac = 0.9 ! stretching towards model top - decay_scale_1 = 4000. ! decay scales for topography - decay_scale_2 = 2500. - decay_exp = 1.2 - flat_height = 16000. ! Height above which the coordinate surfaces are flat DL: set to 20, since no topo? -/ - -! dynamics_nml: dynamical core ----------------------------------------------- -&dynamics_nml - lcoriolis = .TRUE. ! Coriolis Force -/ - -! nh_testcase_nml: testcase namelist ------------------------------------------ -&nh_testcase_nml - nh_test_name = 'APE_nwp' ! test case identifier - ape_sst_case = 'sst_qobs' ! sst distribution - zp_ape = 101325 ! surface pressure [Pa] - ztmc_ape = 50.006 ! total moisture content [kg/m^2] -/ - -! transport_nml: tracer transport --------------------------------------------- -&transport_nml - ihadv_tracer = 52,2,2,2,2,2 ! gdm: 52 combination of hybrid FFSL/Miura3 with subcycling - itype_hlimit = 3,4,4,4,4,4 ! type of limiter for horizontal transport - ivadv_tracer = 3,3,3,3,3,3 ! tracer specific method to compute vertical advection - itype_vlimit = 1,2,1,2,3,1 ! Type of limiter for vertical transport - llsq_svd = .TRUE. ! use SV decomposition for least squares design matrix -/ - -! diffusion_nml: horizontal (numerical) diffusion ---------------------------- -&diffusion_nml - lhdiff_vn = .TRUE. ! diffusion on the horizontal wind field - lhdiff_temp = .TRUE. ! diffusion on the temperature field - lhdiff_w = .TRUE. ! diffusion on the vertical wind field - hdiff_order = 5 ! order of nabla operator for diffusion - itype_vn_diffu = 1 ! reconstruction method used for Smagorinsky diffusion - itype_t_diffu = 2 ! discretization of temperature diffusion - hdiff_efdt_ratio = 24.0 ! ratio of e-folding time to time step - hdiff_smag_fac = 0.025 ! scaling factor for Smagorinsky diffusion -/ - -! io_nml: general switches for model I/O ------------------------------------- -&io_nml - lnetcdf_flt64_output = .TRUE. ! Needed for probtest - write_last_restart = .TRUE. - itype_pres_msl = 4 ! 4: IFS method - restart_file_type = 5 ! 5: NetCDF4 - restart_write_mode = "joint procs multifile" - itype_rh = 1 ! RH w.r.t. water -/ - -! initicon_nml: specify read-in of initial state ------------------------------ -! Needed for probtest -&initicon_nml - pinit_seed = -1 ! seed for perturbation of initial model state. no perturbation by default - pinit_amplitude = 0. ! amplitude of perturbation -/ - -! output namelist: specify output of 2D fields ------------------------------ -&output_nml - output_filename = './atm_2d/' ! file name base - filetype = 5 ! output format 5:NetCDFv4 - filename_format = "exclaim_ape_R02B04_atm_2d_" ! Output filename format - output_start = "2000-01-01T00:00:03Z" - output_end = "2000-01-01T00:00:30Z" - output_interval = "PT2S" - file_interval = "P1D" - include_last = .TRUE. ! flag whether to include the last time step - remap = 0 ! 0: no remapping 1: lat-lon grid - reg_lat_def = -90,0.5,90.0 - reg_lon_def = -180,0.5,179.5 - output_grid = .TRUE. - ml_varlist = 'pres_sfc', - 'tqv' , 'tqc' , 'tqi' , - 'tqr' , 'tqs' , - 'clch' , 'clcm' , 'clcl' , 'clct' , - 'tot_prec','prec_gsp', - 't_2m' , 'qv_2m' , 'u_10m' , 'v_10m' , - 't_g' , 'rh_2m' , 'qv_s', 'tcm','tch', - 'lhfl_s' , 'shfl_s' , 'umfl_s', 'vmfl_s' - 'sob_s' , 'sob_t' , 'sod_t', 'sou_s', - 'thb_s' , 'thb_t' ,'thu_s','pres_sfc' -/ - -&output_nml - output_filename = './atm_3d_pl/'! file name base - filetype = 5 ! output format: 2=GRIB2, 4=NetCDFv2, 5=NetCDFv4 - filename_format = "exclaim_ape_R02B04_atm_3d_pl_" - output_start = "2000-01-01T00:00:00Z" - output_end = "2000-01-01T00:00:30Z" - output_interval = "PT2S" - file_interval = "P1D" - include_last = .TRUE. ! flag whether to include the last time step - remap = 0 ! 0: no remapping 1: lat-lon grid - reg_lat_def = -90,0.5,90.0 - reg_lon_def = -180,0.5,179.5 - output_grid = .TRUE. - p_levels = 10000,20000,25000,30000,50000,70000,85000,90000,95000,100000 - pl_varlist = 'geopot', 'temp','u', 'v', 'w', 'qv', 'qc', 'qi' -/ diff --git a/tests/cases/remote/config/ICON/icon_master.namelist b/tests/cases/remote/config/ICON/icon_master.namelist deleted file mode 100644 index af88fc57..00000000 --- a/tests/cases/remote/config/ICON/icon_master.namelist +++ /dev/null @@ -1,23 +0,0 @@ -&master_nml - lrestart = .false. - read_restart_namelists = .true. -/ -&master_time_control_nml - calendar = 'proleptic gregorian' - experimentStartDate = '2000-01-01T00:00:00Z' - restartTimeIntval = 'PT1M' - checkpointTimeIntval = 'PT1M' - experimentStopDate = '2000-01-01T00:02:00Z' -/ -&time_nml - is_relative_time = .false. -/ -&master_model_nml - model_name="atm" - model_namelist_filename="NAMELIST_exclaim_ape_R02B04" - model_type=1 - model_min_rank=0 - model_max_rank=65535 - model_inc_rank=1 - model_rank_group_size=1 -/ diff --git a/tests/cases/remote/config/config.yml b/tests/cases/remote/config/config.yml deleted file mode 100644 index c0b1023f..00000000 --- a/tests/cases/remote/config/config.yml +++ /dev/null @@ -1,107 +0,0 @@ -start_date: &root_start_date "2026-01-01T00:00" -stop_date: &root_stop_date "2027-01-01T00:00" -cycles: - - bimonthly_tasks: - cycling: - start_date: *root_start_date - stop_date: *root_stop_date - period: P6M - tasks: - - icon: - inputs: - - initial_conditions: - when: - at: *root_start_date - port: init - - icon_restart: - when: - after: *root_start_date - target_cycle: - lag: -P6M - parameters: - foo: single - bar: single - port: restart - - forcing: - port: forcing - outputs: [icon_output, icon_restart] - - statistics_foo: - inputs: - - icon_output: - parameters: - bar: single - port: None - outputs: [analysis_foo] - - statistics_foo_bar: - inputs: - - analysis_foo: - port: None - outputs: [analysis_foo_bar] - - yearly: - cycling: - start_date: *root_start_date - stop_date: *root_stop_date - period: P1Y - tasks: - - merge: - inputs: - - analysis_foo_bar: - target_cycle: - lag: ["P0M", "P6M"] - port: None - outputs: [yearly_analysis] -tasks: - - icon: - plugin: shell - src: scripts/icon.py - command: "icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}" - parameters: [foo, bar] - computer: localhost - - statistics_foo: - plugin: shell - src: scripts/statistics.py - command: "statistics.py {PORT::None}" - parameters: [bar] - computer: localhost - - statistics_foo_bar: - plugin: shell - src: scripts/statistics.py - command: "statistics.py {PORT::None}" - computer: localhost - - merge: - plugin: shell - src: scripts/merge.py - command: "merge.py {PORT::None}" - computer: localhost -data: - available: - - initial_conditions: - type: file - src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/remote/config/data/initial_conditions - computer: localhost - - forcing: - type: file - src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/remote/config/data/forcing - computer: localhost - generated: - - icon_output: - type: file - src: icon_output - parameters: [foo, bar] - - icon_restart: - type: file - src: restart - parameters: [foo, bar] - - analysis_foo: - type: file - src: analysis - parameters: [bar] - - analysis_foo_bar: - type: file - src: analysis - - yearly_analysis: - type: file - src: analysis -parameters: - foo: [0, 1] - bar: [3.0] diff --git a/tests/cases/remote/config/data/forcing b/tests/cases/remote/config/data/forcing deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/cases/remote/config/data/initial_conditions b/tests/cases/remote/config/data/initial_conditions deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/cases/remote/config/scripts/icon.py b/tests/cases/remote/config/scripts/icon.py deleted file mode 100755 index 32f71ed6..00000000 --- a/tests/cases/remote/config/scripts/icon.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python -"""usage: icon.py [-h] [--init [INIT]] [--restart [RESTART]] [--forcing [FORCING]] [namelist] - -A script mocking parts of icon in a form of a shell script - -options: - -h, --help show this help message and exit - --init [INIT] The icon init file. - --restart [RESTART] The icon restart file. - --forcing [FORCING] The icon forcing file. -""" - -import argparse -from pathlib import Path - -LOG_FILE = Path("icon.log") - - -def log(text: str): - print(text) - with LOG_FILE.open("a") as f: - f.write(text) - - -def main(): - parser = argparse.ArgumentParser(description="A script mocking parts of icon in a form of a shell script.") - parser.add_argument("--init", nargs="?", type=str, help="The icon init file.") - parser.add_argument("namelist", nargs="?", default=None) - parser.add_argument("--restart", nargs="?", type=str, help="The icon restart file.") - parser.add_argument("--forcing", nargs="?", type=str, help="The icon forcing file.") - - args = parser.parse_args() - - output = Path("icon_output") - output.write_text("") - - if args.restart: - if args.init: - msg = "Cannot use '--init' and '--restart' option at the same time." - raise ValueError(msg) - if not Path(args.restart).exists(): - msg = f"The icon restart file {args.restart!r} was not found." - raise FileNotFoundError(msg) - restart = Path(args.restart) - - log(f"Restarting from file {args.restart!r}.") - elif args.init: - if not Path(args.init).exists(): - msg = f"The icon init file {args.init!r} was not found." - raise FileNotFoundError(msg) - - log(f"Starting from init file {args.init!r}.") - else: - msg = "Please provide a restart or init file with the corresponding option." - raise ValueError(msg) - - if args.namelist: - log(f"Namelist {args.namelist} provided. Continue with it.") - else: - log("No namelist provided. Continue with default one.") - - # Main script execution continues here - log("Script finished running calculations") - - restart = Path("restart") - restart.write_text("") - - -if __name__ == "__main__": - main() diff --git a/tests/cases/remote/config/scripts/merge.py b/tests/cases/remote/config/scripts/merge.py deleted file mode 100755 index 2fa94152..00000000 --- a/tests/cases/remote/config/scripts/merge.py +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env python - -import argparse -from pathlib import Path - - -def main(): - parser = argparse.ArgumentParser(description="A script mocking parts of icon in a form of a shell script.") - parser.add_argument("file", nargs="+", type=str, help="The files to analyse.") - args = parser.parse_args() - Path("analysis").write_text(f"analysis for file {args.file}") - - -if __name__ == "__main__": - main() diff --git a/tests/cases/remote/config/scripts/statistics.py b/tests/cases/remote/config/scripts/statistics.py deleted file mode 100755 index 2fa94152..00000000 --- a/tests/cases/remote/config/scripts/statistics.py +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env python - -import argparse -from pathlib import Path - - -def main(): - parser = argparse.ArgumentParser(description="A script mocking parts of icon in a form of a shell script.") - parser.add_argument("file", nargs="+", type=str, help="The files to analyse.") - args = parser.parse_args() - Path("analysis").write_text(f"analysis for file {args.file}") - - -if __name__ == "__main__": - main() diff --git a/tests/cases/remote/data/config.txt b/tests/cases/remote/data/config.txt deleted file mode 100644 index 52ca5c4b..00000000 --- a/tests/cases/remote/data/config.txt +++ /dev/null @@ -1,251 +0,0 @@ -cycles: - - bimonthly_tasks [date: 2026-01-01 00:00:00]: - tasks: - - icon [foo: 0, bar: 3.0, date: 2026-01-01 00:00:00]: - input: - - initial_conditions - - forcing - output: - - icon_output [foo: 0, bar: 3.0, date: 2026-01-01 00:00:00] - - icon_restart [foo: 0, bar: 3.0, date: 2026-01-01 00:00:00] - name: 'icon' - coordinates: {'foo': 0, 'bar': 3.0, 'date': datetime.datetime(2026, 1, 1, 0, 0)} - cycle point: [2026-01-01 00:00:00 -- 2026-07-01 00:00:00] - plugin: 'shell' - src: scripts/icon.py - command: 'icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}' - env source files: [] - - icon [foo: 1, bar: 3.0, date: 2026-01-01 00:00:00]: - input: - - initial_conditions - - forcing - output: - - icon_output [foo: 1, bar: 3.0, date: 2026-01-01 00:00:00] - - icon_restart [foo: 1, bar: 3.0, date: 2026-01-01 00:00:00] - name: 'icon' - coordinates: {'foo': 1, 'bar': 3.0, 'date': datetime.datetime(2026, 1, 1, 0, 0)} - cycle point: [2026-01-01 00:00:00 -- 2026-07-01 00:00:00] - plugin: 'shell' - src: scripts/icon.py - command: 'icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}' - env source files: [] - - statistics_foo [bar: 3.0, date: 2026-01-01 00:00:00]: - input: - - icon_output [foo: 0, bar: 3.0, date: 2026-01-01 00:00:00] - - icon_output [foo: 1, bar: 3.0, date: 2026-01-01 00:00:00] - output: - - analysis_foo [bar: 3.0, date: 2026-01-01 00:00:00] - name: 'statistics_foo' - coordinates: {'bar': 3.0, 'date': datetime.datetime(2026, 1, 1, 0, 0)} - cycle point: [2026-01-01 00:00:00 -- 2026-07-01 00:00:00] - plugin: 'shell' - src: scripts/statistics.py - command: 'statistics.py {PORT::None}' - env source files: [] - - statistics_foo_bar [date: 2026-01-01 00:00:00]: - input: - - analysis_foo [bar: 3.0, date: 2026-01-01 00:00:00] - output: - - analysis_foo_bar [date: 2026-01-01 00:00:00] - name: 'statistics_foo_bar' - coordinates: {'date': datetime.datetime(2026, 1, 1, 0, 0)} - cycle point: [2026-01-01 00:00:00 -- 2026-07-01 00:00:00] - plugin: 'shell' - src: scripts/statistics.py - command: 'statistics.py {PORT::None}' - env source files: [] - - bimonthly_tasks [date: 2026-07-01 00:00:00]: - tasks: - - icon [foo: 0, bar: 3.0, date: 2026-07-01 00:00:00]: - input: - - icon_restart [foo: 0, bar: 3.0, date: 2026-01-01 00:00:00] - - forcing - output: - - icon_output [foo: 0, bar: 3.0, date: 2026-07-01 00:00:00] - - icon_restart [foo: 0, bar: 3.0, date: 2026-07-01 00:00:00] - name: 'icon' - coordinates: {'foo': 0, 'bar': 3.0, 'date': datetime.datetime(2026, 7, 1, 0, 0)} - cycle point: [2026-07-01 00:00:00 -- 2027-01-01 00:00:00] - plugin: 'shell' - src: scripts/icon.py - command: 'icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}' - env source files: [] - - icon [foo: 1, bar: 3.0, date: 2026-07-01 00:00:00]: - input: - - icon_restart [foo: 1, bar: 3.0, date: 2026-01-01 00:00:00] - - forcing - output: - - icon_output [foo: 1, bar: 3.0, date: 2026-07-01 00:00:00] - - icon_restart [foo: 1, bar: 3.0, date: 2026-07-01 00:00:00] - name: 'icon' - coordinates: {'foo': 1, 'bar': 3.0, 'date': datetime.datetime(2026, 7, 1, 0, 0)} - cycle point: [2026-07-01 00:00:00 -- 2027-01-01 00:00:00] - plugin: 'shell' - src: scripts/icon.py - command: 'icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}' - env source files: [] - - statistics_foo [bar: 3.0, date: 2026-07-01 00:00:00]: - input: - - icon_output [foo: 0, bar: 3.0, date: 2026-07-01 00:00:00] - - icon_output [foo: 1, bar: 3.0, date: 2026-07-01 00:00:00] - output: - - analysis_foo [bar: 3.0, date: 2026-07-01 00:00:00] - name: 'statistics_foo' - coordinates: {'bar': 3.0, 'date': datetime.datetime(2026, 7, 1, 0, 0)} - cycle point: [2026-07-01 00:00:00 -- 2027-01-01 00:00:00] - plugin: 'shell' - src: scripts/statistics.py - command: 'statistics.py {PORT::None}' - env source files: [] - - statistics_foo_bar [date: 2026-07-01 00:00:00]: - input: - - analysis_foo [bar: 3.0, date: 2026-07-01 00:00:00] - output: - - analysis_foo_bar [date: 2026-07-01 00:00:00] - name: 'statistics_foo_bar' - coordinates: {'date': datetime.datetime(2026, 7, 1, 0, 0)} - cycle point: [2026-07-01 00:00:00 -- 2027-01-01 00:00:00] - plugin: 'shell' - src: scripts/statistics.py - command: 'statistics.py {PORT::None}' - env source files: [] - - bimonthly_tasks [date: 2027-01-01 00:00:00]: - tasks: - - icon [foo: 0, bar: 3.0, date: 2027-01-01 00:00:00]: - input: - - icon_restart [foo: 0, bar: 3.0, date: 2026-07-01 00:00:00] - - forcing - output: - - icon_output [foo: 0, bar: 3.0, date: 2027-01-01 00:00:00] - - icon_restart [foo: 0, bar: 3.0, date: 2027-01-01 00:00:00] - name: 'icon' - coordinates: {'foo': 0, 'bar': 3.0, 'date': datetime.datetime(2027, 1, 1, 0, 0)} - cycle point: [2027-01-01 00:00:00 -- 2027-07-01 00:00:00] - plugin: 'shell' - src: scripts/icon.py - command: 'icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}' - env source files: [] - - icon [foo: 1, bar: 3.0, date: 2027-01-01 00:00:00]: - input: - - icon_restart [foo: 1, bar: 3.0, date: 2026-07-01 00:00:00] - - forcing - output: - - icon_output [foo: 1, bar: 3.0, date: 2027-01-01 00:00:00] - - icon_restart [foo: 1, bar: 3.0, date: 2027-01-01 00:00:00] - name: 'icon' - coordinates: {'foo': 1, 'bar': 3.0, 'date': datetime.datetime(2027, 1, 1, 0, 0)} - cycle point: [2027-01-01 00:00:00 -- 2027-07-01 00:00:00] - plugin: 'shell' - src: scripts/icon.py - command: 'icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}' - env source files: [] - - statistics_foo [bar: 3.0, date: 2027-01-01 00:00:00]: - input: - - icon_output [foo: 0, bar: 3.0, date: 2027-01-01 00:00:00] - - icon_output [foo: 1, bar: 3.0, date: 2027-01-01 00:00:00] - output: - - analysis_foo [bar: 3.0, date: 2027-01-01 00:00:00] - name: 'statistics_foo' - coordinates: {'bar': 3.0, 'date': datetime.datetime(2027, 1, 1, 0, 0)} - cycle point: [2027-01-01 00:00:00 -- 2027-07-01 00:00:00] - plugin: 'shell' - src: scripts/statistics.py - command: 'statistics.py {PORT::None}' - env source files: [] - - statistics_foo_bar [date: 2027-01-01 00:00:00]: - input: - - analysis_foo [bar: 3.0, date: 2027-01-01 00:00:00] - output: - - analysis_foo_bar [date: 2027-01-01 00:00:00] - name: 'statistics_foo_bar' - coordinates: {'date': datetime.datetime(2027, 1, 1, 0, 0)} - cycle point: [2027-01-01 00:00:00 -- 2027-07-01 00:00:00] - plugin: 'shell' - src: scripts/statistics.py - command: 'statistics.py {PORT::None}' - env source files: [] - - bimonthly_tasks [date: 2027-07-01 00:00:00]: - tasks: - - icon [foo: 0, bar: 3.0, date: 2027-07-01 00:00:00]: - input: - - icon_restart [foo: 0, bar: 3.0, date: 2027-01-01 00:00:00] - - forcing - output: - - icon_output [foo: 0, bar: 3.0, date: 2027-07-01 00:00:00] - - icon_restart [foo: 0, bar: 3.0, date: 2027-07-01 00:00:00] - name: 'icon' - coordinates: {'foo': 0, 'bar': 3.0, 'date': datetime.datetime(2027, 7, 1, 0, 0)} - cycle point: [2027-07-01 00:00:00 -- 2028-01-01 00:00:00] - plugin: 'shell' - src: scripts/icon.py - command: 'icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}' - env source files: [] - - icon [foo: 1, bar: 3.0, date: 2027-07-01 00:00:00]: - input: - - icon_restart [foo: 1, bar: 3.0, date: 2027-01-01 00:00:00] - - forcing - output: - - icon_output [foo: 1, bar: 3.0, date: 2027-07-01 00:00:00] - - icon_restart [foo: 1, bar: 3.0, date: 2027-07-01 00:00:00] - name: 'icon' - coordinates: {'foo': 1, 'bar': 3.0, 'date': datetime.datetime(2027, 7, 1, 0, 0)} - cycle point: [2027-07-01 00:00:00 -- 2028-01-01 00:00:00] - plugin: 'shell' - src: scripts/icon.py - command: 'icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}' - env source files: [] - - statistics_foo [bar: 3.0, date: 2027-07-01 00:00:00]: - input: - - icon_output [foo: 0, bar: 3.0, date: 2027-07-01 00:00:00] - - icon_output [foo: 1, bar: 3.0, date: 2027-07-01 00:00:00] - output: - - analysis_foo [bar: 3.0, date: 2027-07-01 00:00:00] - name: 'statistics_foo' - coordinates: {'bar': 3.0, 'date': datetime.datetime(2027, 7, 1, 0, 0)} - cycle point: [2027-07-01 00:00:00 -- 2028-01-01 00:00:00] - plugin: 'shell' - src: scripts/statistics.py - command: 'statistics.py {PORT::None}' - env source files: [] - - statistics_foo_bar [date: 2027-07-01 00:00:00]: - input: - - analysis_foo [bar: 3.0, date: 2027-07-01 00:00:00] - output: - - analysis_foo_bar [date: 2027-07-01 00:00:00] - name: 'statistics_foo_bar' - coordinates: {'date': datetime.datetime(2027, 7, 1, 0, 0)} - cycle point: [2027-07-01 00:00:00 -- 2028-01-01 00:00:00] - plugin: 'shell' - src: scripts/statistics.py - command: 'statistics.py {PORT::None}' - env source files: [] - - yearly [date: 2026-01-01 00:00:00]: - tasks: - - merge [date: 2026-01-01 00:00:00]: - input: - - analysis_foo_bar [date: 2026-01-01 00:00:00] - - analysis_foo_bar [date: 2026-07-01 00:00:00] - output: - - yearly_analysis [date: 2026-01-01 00:00:00] - name: 'merge' - coordinates: {'date': datetime.datetime(2026, 1, 1, 0, 0)} - cycle point: [2026-01-01 00:00:00 -- 2027-01-01 00:00:00] - plugin: 'shell' - src: scripts/merge.py - command: 'merge.py {PORT::None}' - env source files: [] - - yearly [date: 2027-01-01 00:00:00]: - tasks: - - merge [date: 2027-01-01 00:00:00]: - input: - - analysis_foo_bar [date: 2027-01-01 00:00:00] - - analysis_foo_bar [date: 2027-07-01 00:00:00] - output: - - yearly_analysis [date: 2027-01-01 00:00:00] - name: 'merge' - coordinates: {'date': datetime.datetime(2027, 1, 1, 0, 0)} - cycle point: [2027-01-01 00:00:00 -- 2028-01-01 00:00:00] - plugin: 'shell' - src: scripts/merge.py - command: 'merge.py {PORT::None}' - env source files: [] \ No newline at end of file diff --git a/tests/cases/remote/svg/.gitkeep b/tests/cases/remote/svg/.gitkeep deleted file mode 100644 index e69de29b..00000000 From a51ab9a61466762ab3c5e7470d9c48010eeca4f4 Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Mon, 2 Jun 2025 17:39:09 +0200 Subject: [PATCH 16/27] . --- .vscode/settings.json | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 3e99ede3..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "python.testing.pytestArgs": [ - "." - ], - "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true -} \ No newline at end of file From c149a1064568010a46f23030fc74a3f25c85ccbf Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Tue, 3 Jun 2025 06:27:34 +0200 Subject: [PATCH 17/27] . --- src/sirocco/parsing/yaml_data_models.py | 10 +++++----- src/sirocco/workgraph.py | 1 - tests/test_workgraph.py | 5 +++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/sirocco/parsing/yaml_data_models.py b/src/sirocco/parsing/yaml_data_models.py index 62b08cde..f4c891f7 100644 --- a/src/sirocco/parsing/yaml_data_models.py +++ b/src/sirocco/parsing/yaml_data_models.py @@ -713,7 +713,7 @@ def from_config_file(cls, config_path: str) -> Self: object_["rootdir"] = config_resolved_path.parent adapter = TypeAdapter(cls) return adapter.validate_python(object_) - + @classmethod def from_yaml_str(cls, yaml_content: str, name: str = None, rootdir: Path = None) -> Self: """Creates a Workflow instance from a YAML string. @@ -730,17 +730,17 @@ def from_yaml_str(cls, yaml_content: str, name: str = None, rootdir: Path = None if yaml_content.strip() == "": msg = "YAML content is empty." raise ValueError(msg) - + reader = YAML(typ="safe", pure=True) object_ = reader.load(StringIO(yaml_content)) - + # Set name if not specified in YAML if "name" not in object_: object_["name"] = name or "workflow" - + # Set rootdir if not specified object_["rootdir"] = rootdir or Path.cwd() - + adapter = TypeAdapter(cls) return adapter.validate_python(object_) diff --git a/src/sirocco/workgraph.py b/src/sirocco/workgraph.py index 54cdb29f..50a4ded0 100644 --- a/src/sirocco/workgraph.py +++ b/src/sirocco/workgraph.py @@ -9,7 +9,6 @@ import aiida_workgraph # type: ignore[import-untyped] # does not have proper typing and stubs import aiida_workgraph.tasks.factory.shelljob_task # type: ignore[import-untyped] # is only for a workaround from aiida.common.exceptions import NotExistent -from rich.pretty import pprint from sirocco import core diff --git a/tests/test_workgraph.py b/tests/test_workgraph.py index 8cbdeceb..e5d5fc49 100644 --- a/tests/test_workgraph.py +++ b/tests/test_workgraph.py @@ -1,8 +1,9 @@ +import textwrap + import pytest from aiida import orm -import textwrap -from sirocco.core import Workflow, GeneratedData +from sirocco.core import GeneratedData, Workflow from sirocco.parsing import yaml_data_models as models from sirocco.workgraph import AiidaWorkGraph From 80948e6c867a925414b04567ed2763ce47e0661f Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Tue, 3 Jun 2025 06:37:22 +0200 Subject: [PATCH 18/27] . --- pyproject.toml | 2 +- src/sirocco/parsing/yaml_data_models.py | 30 ------------------------- 2 files changed, 1 insertion(+), 31 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 42c55e9a..fc8d19e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ "pygraphviz", "lxml", "f90nml", - "aiida-shell @ git+https://github.com/sphuber/aiida-shell.git@fix/105/handle-remote-data-argument-placeholders", + "aiida-shell @ git+https://github.com/sphuber/aiida-shell.git@master", ] license = {file = "LICENSE"} diff --git a/src/sirocco/parsing/yaml_data_models.py b/src/sirocco/parsing/yaml_data_models.py index f4c891f7..b00ad31c 100644 --- a/src/sirocco/parsing/yaml_data_models.py +++ b/src/sirocco/parsing/yaml_data_models.py @@ -714,36 +714,6 @@ def from_config_file(cls, config_path: str) -> Self: adapter = TypeAdapter(cls) return adapter.validate_python(object_) - @classmethod - def from_yaml_str(cls, yaml_content: str, name: str = None, rootdir: Path = None) -> Self: - """Creates a Workflow instance from a YAML string. - - Args: - yaml_content (str): The YAML content as a string. - name (str, optional): The name for the workflow. If not provided, defaults to "workflow". - rootdir (Path, optional): The root directory for the workflow. Defaults to current directory. - - Returns: - OBJECT_T: An instance of the specified class type with data parsed and - validated from the YAML content. - """ - if yaml_content.strip() == "": - msg = "YAML content is empty." - raise ValueError(msg) - - reader = YAML(typ="safe", pure=True) - object_ = reader.load(StringIO(yaml_content)) - - # Set name if not specified in YAML - if "name" not in object_: - object_["name"] = name or "workflow" - - # Set rootdir if not specified - object_["rootdir"] = rootdir or Path.cwd() - - adapter = TypeAdapter(cls) - return adapter.validate_python(object_) - OBJECT_T = typing.TypeVar("OBJECT_T") From c2306671d88fdeda1f183878504b695394a54d61 Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Tue, 3 Jun 2025 07:14:08 +0200 Subject: [PATCH 19/27] hatch fmt and types:check pass --- pyproject.toml | 10 +- src/sirocco/workgraph.py | 61 ++----- tests/conftest.py | 12 +- tests/test_workgraph.py | 335 +++++++++++++-------------------------- 4 files changed, 142 insertions(+), 276 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index fc8d19e5..d767ff71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,7 +67,7 @@ filterwarnings = [ source = ["sirocco"] [tool.ruff] -include = ["src/*py", "tests/*py"] +include = ["src/*py", "tests/*py"] # PRCOMMENT: Do we want to run Ruff via CI on our test files?? target-version = "py310" [tool.ruff.lint] @@ -76,6 +76,14 @@ ignore = [ "TRY003", # write custom error messages for formatting ] +[tool.ruff.lint.per-file-ignores] +"tests/*py" = [ + "SLF001", # Private member accessed + "S101", # Use of assert detected + "T201", # `print` found + "PLR2004", # Magic value used in comparison +] + ## Hatch configurations [tool.hatch.metadata] diff --git a/src/sirocco/workgraph.py b/src/sirocco/workgraph.py index 50a4ded0..bf38cb65 100644 --- a/src/sirocco/workgraph.py +++ b/src/sirocco/workgraph.py @@ -15,9 +15,7 @@ if TYPE_CHECKING: from aiida_workgraph.socket import TaskSocket # type: ignore[import-untyped] - WorkgraphDataNode: TypeAlias = ( - aiida.orm.RemoteData | aiida.orm.SinglefileData | aiida.orm.FolderData - ) + WorkgraphDataNode: TypeAlias = aiida.orm.RemoteData | aiida.orm.SinglefileData | aiida.orm.FolderData # This is a workaround required when splitting the initialization of the task and its linked nodes Merging this into @@ -152,8 +150,7 @@ def get_aiida_label_from_graph_item(cls, obj: core.GraphItem) -> str: through the replacement of invalid chars in the coordinates duplication can happen but it is unlikely. """ return cls.replace_invalid_chars_in_label( - f"{obj.name}" - + "__".join(f"_{key}_{value}" for key, value in obj.coordinates.items()) + f"{obj.name}" + "__".join(f"_{key}_{value}" for key, value in obj.coordinates.items()) ) @staticmethod @@ -167,17 +164,11 @@ def split_cmd_arg(command_line: str) -> tuple[str, str]: def label_placeholder(cls, data: core.Data) -> str: return f"{{{cls.get_aiida_label_from_graph_item(data)}}}" - def data_from_core( - self, core_available_data: core.AvailableData - ) -> WorkgraphDataNode: - return self._aiida_data_nodes[ - self.get_aiida_label_from_graph_item(core_available_data) - ] + def data_from_core(self, core_available_data: core.AvailableData) -> WorkgraphDataNode: + return self._aiida_data_nodes[self.get_aiida_label_from_graph_item(core_available_data)] def socket_from_core(self, core_generated_data: core.GeneratedData) -> TaskSocket: - return self._aiida_socket_nodes[ - self.get_aiida_label_from_graph_item(core_generated_data) - ] + return self._aiida_socket_nodes[self.get_aiida_label_from_graph_item(core_generated_data)] def task_from_core(self, core_task: core.Task) -> aiida_workgraph.Task: return self._aiida_task_nodes[self.get_aiida_label_from_graph_item(core_task)] @@ -193,11 +184,7 @@ def _add_aiida_input_data_node(self, data: core.Data): Create an `aiida.orm.Data` instance from the provided graph item. """ label = self.get_aiida_label_from_graph_item(data) - data_full_path = ( - data.src - if data.src.is_absolute() - else self._core_workflow.config_rootdir / data.src - ) + data_full_path = data.src if data.src.is_absolute() else self._core_workflow.config_rootdir / data.src if data.computer is not None: try: @@ -210,13 +197,9 @@ def _add_aiida_input_data_node(self, data: core.Data): remote_path=str(data.src), label=label, computer=computer ) elif data.type == "file": - self._aiida_data_nodes[label] = aiida.orm.SinglefileData( - label=label, file=data_full_path - ) + self._aiida_data_nodes[label] = aiida.orm.SinglefileData(label=label, file=data_full_path) elif data.type == "dir": - self._aiida_data_nodes[label] = aiida.orm.FolderData( - label=label, tree=data_full_path - ) + self._aiida_data_nodes[label] = aiida.orm.FolderData(label=label, tree=data_full_path) else: msg = f"Data type {data.type!r} not supported. Please use 'file' or 'dir'." raise ValueError(msg) @@ -255,9 +238,7 @@ def _create_shell_task_node(self, task: core.ShellTask): else (task.config_rootdir / env_source_path) for env_source_file in task.env_source_files ] - prepend_text = "\n".join( - [f"source {env_source_path}" for env_source_path in env_source_paths] - ) + prepend_text = "\n".join([f"source {env_source_path}" for env_source_path in env_source_paths]) metadata["options"] = {"prepend_text": prepend_text} # NOTE: Hardcoded for now, possibly make user-facing option metadata["options"]["use_symlinks"] = True @@ -325,9 +306,7 @@ def _link_input_node_to_shelltask(self, task: core.ShellTask, input_: core.Data) def _link_wait_on_to_task(self, task: core.Task): """link wait on tasks to workgraph task""" - self.task_from_core(task).wait = [ - self.task_from_core(wt) for wt in task.wait_on - ] + self.task_from_core(task).wait = [self.task_from_core(wt) for wt in task.wait_on] def _set_shelljob_arguments(self, task: core.ShellTask): """Set AiiDA ShellJob arguments by replacing port placeholders with AiiDA labels.""" @@ -342,7 +321,7 @@ def _set_shelljob_arguments(self, task: core.ShellTask): raise ValueError(msg) # Build input_labels dictionary for port resolution - input_labels = {} + input_labels: dict[str, list[str]] = {} for port_name, input_list in task.inputs.items(): input_labels[port_name] = [] for input_ in input_list: @@ -365,11 +344,7 @@ def _set_shelljob_filenames(self, task: core.ShellTask): for input_ in task.input_data_nodes(): input_label = self.get_aiida_label_from_graph_item(input_) - if ( - task.computer - and input_.computer - and isinstance(input_, core.AvailableData) - ): + if task.computer and input_.computer and isinstance(input_, core.AvailableData): # For RemoteData on the same computer, use just the filename filename = Path(input_.src).name filenames[input_.name] = filename @@ -379,9 +354,7 @@ def _set_shelljob_filenames(self, task: core.ShellTask): # coordinates need unique filenames to avoid conflicts in the working directory # Count how many inputs have the same base name - same_name_count = sum( - 1 for inp in task.input_data_nodes() if inp.name == input_.name - ) + same_name_count = sum(1 for inp in task.input_data_nodes() if inp.name == input_.name) if same_name_count > 1: # Multiple data nodes with same base name - use full label as filename @@ -389,9 +362,7 @@ def _set_shelljob_filenames(self, task: core.ShellTask): filename = input_label else: # Single data node with this name - can use simple filename - filename = ( - Path(input_.src).name if hasattr(input_, "src") else input_.name - ) + filename = Path(input_.src).name if hasattr(input_, "src") else input_.name # The key in filenames dict should be the input label (what's used in nodes dict) filenames[input_label] = filename @@ -419,9 +390,7 @@ def submit( timeout: int = 60, metadata: None | dict[str, Any] = None, ) -> aiida.orm.Node: - self._workgraph.submit( - inputs=inputs, wait=wait, timeout=timeout, metadata=metadata - ) + self._workgraph.submit(inputs=inputs, wait=wait, timeout=timeout, metadata=metadata) if (output_node := self._workgraph.process) is None: # The node should not be None after a run, it should contain exit code and message so if the node is None something internal went wrong msg = "Something went wrong when running workgraph. Please contact a developer." diff --git a/tests/conftest.py b/tests/conftest.py index 67171ce0..b4483f33 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,7 +21,7 @@ def __init__(self, url: str, response: requests.Response): def download_file(url: str, file_path: pathlib.Path): - response = requests.get(url) + response = requests.get(url) # noqa: S113 if not response.ok: raise DownloadError(url, response) @@ -48,7 +48,7 @@ def icon_grid_simple_path(pytestconfig): @pytest.fixture def icon_filepath_executable() -> str: - which_icon = subprocess.run(["which", "icon"], capture_output=True, check=False) + which_icon = subprocess.run(["which", "icon"], capture_output=True, check=False) # noqa: S607 if which_icon.returncode: msg = "Could not find icon executable." raise FileNotFoundError(msg) @@ -99,7 +99,11 @@ def minimal_invert_task_io_config() -> models.ConfigWorkflow: ], data=models.ConfigData( available=[ - models.ConfigAvailableData(name="availalble", type=models.DataType.FILE, src=pathlib.Path("foo.txt")) + models.ConfigAvailableData( + name="availalble", + type=models.DataType.FILE, + src=pathlib.Path("foo.txt"), + ) ], generated=[ models.ConfigGeneratedData(name="output_a", type=models.DataType.DIR, src=pathlib.Path("bar")), @@ -154,7 +158,7 @@ def serialize_nml(config_paths: dict[str, pathlib.Path], workflow: workflow.Work def pytest_configure(config): if config.getoption("reserialize"): - print("Regenerating serialized references") # noqa: T201 # this is actual UX, not a debug print + print("Regenerating serialized references") # this is actual UX, not a debug print for config_case in ALL_CONFIG_CASES: config_paths = generate_config_paths(config_case) wf = workflow.Workflow.from_config_file(str(config_paths["yml"])) diff --git a/tests/test_workgraph.py b/tests/test_workgraph.py index e5d5fc49..4eeea427 100644 --- a/tests/test_workgraph.py +++ b/tests/test_workgraph.py @@ -252,7 +252,7 @@ def test_parameterized_filename_conflicts(tmp_path): assert filenames["shared_config"] == "config.json" # simulation_output should use full labels (conflict with other analyze tasks) - sim_output_keys = [k for k in filenames.keys() if k.startswith("simulation_output")] + sim_output_keys = [k for k in filenames if k.startswith("simulation_output")] assert len(sim_output_keys) == 2 # Should have both foo=1 and foo=2 inputs for key in sim_output_keys: @@ -271,99 +271,6 @@ def test_parameterized_filename_conflicts(tmp_path): assert filenames["shared_config"] == "config.json" -@pytest.mark.usefixtures("aiida_localhost") -def test_comprehensive_parameterized_workflow(tmp_path): - """Test parameterized workflow behavior and properties.""" - yaml_str = textwrap.dedent(f""" - start_date: &start "2026-01-01T00:00" - stop_date: &stop "2026-07-01T00:00" - cycles: - - main: - cycling: - start_date: *start - stop_date: *stop - period: P6M - tasks: - - simulate: - inputs: - - config: - port: cfg - outputs: [sim_output] - - analyze: - inputs: - - sim_output: - parameters: {{foo: all, bar: single}} - port: data - outputs: [analysis] - tasks: - - simulate: - plugin: shell - command: "sim.py {{PORT::cfg}}" - src: {tmp_path}/sim.py - parameters: [foo, bar] - computer: localhost - - analyze: - plugin: shell - command: "analyze.py {{PORT::data}}" - src: {tmp_path}/analyze.py - parameters: [bar] - computer: localhost - data: - available: - - config: - type: file - src: {tmp_path}/config.txt - computer: localhost - generated: - - sim_output: - type: file - src: output.dat - parameters: [foo, bar] - - analysis: - type: file - src: analysis.txt - parameters: [bar] - parameters: - foo: [0, 1] - bar: [3.0] - """) - - config_file = tmp_path / "config.yml" - config_file.write_text(yaml_str) - - # Create files - (tmp_path / "config.txt").touch() - (tmp_path / "sim.py").touch() - (tmp_path / "analyze.py").touch() - - core_wf = Workflow.from_config_file(str(config_file)) - aiida_wf = AiidaWorkGraph(core_wf) - - # Verify task structure - sim_tasks = [t for t in aiida_wf._workgraph.tasks if t.name.startswith("simulate")] - analyze_tasks = [t for t in aiida_wf._workgraph.tasks if t.name.startswith("analyze")] - - assert len(sim_tasks) == 2 # 2 foo values × 1 bar value = 2 tasks - assert len(analyze_tasks) == 1 # 1 bar value = 1 task - - # Check simulate tasks (should have simple config filename) - for task in sim_tasks: - filenames = task.inputs.filenames.value - assert filenames["config"] == "config.txt" # No conflict, simple name - - # Check analyze task (should have complex filenames due to conflicts) - analyze_task = analyze_tasks[0] - filenames = analyze_task.inputs.filenames.value - - # Should have 2 sim_output inputs with full labels as filenames - sim_output_keys = [k for k in filenames.keys() if k.startswith("sim_output")] - assert len(sim_output_keys) == 2 - - for key in sim_output_keys: - assert filenames[key] == key # Full label used as filename - assert "foo_" in key and "bar_3_0" in key # Contains parameter info - - @pytest.mark.usefixtures("aiida_localhost") def test_parameterized_workflow_regression(tmp_path): """Regression test for exact parameterized workflow output.""" @@ -447,7 +354,10 @@ def test_parameterized_workflow_regression(tmp_path): nodes_keys = list(task.inputs.nodes._sockets.keys()) # Expected values for regression detection - expected_keys = ["sim_result_param_1___date_2026_01_01_00_00_00", "sim_result_param_2___date_2026_01_01_00_00_00"] + expected_keys = [ + "sim_result_param_1___date_2026_01_01_00_00_00", + "sim_result_param_2___date_2026_01_01_00_00_00", + ] expected_filenames = { "sim_result_param_1___date_2026_01_01_00_00_00": "sim_result_param_1___date_2026_01_01_00_00_00", "sim_result_param_2___date_2026_01_01_00_00_00": "sim_result_param_2___date_2026_01_01_00_00_00", @@ -461,6 +371,102 @@ def test_parameterized_workflow_regression(tmp_path): assert arguments == expected_arguments +@pytest.mark.usefixtures("aiida_localhost") +def test_comprehensive_parameterized_workflow(tmp_path): + """Test parameterized workflow behavior and properties.""" + yaml_str = textwrap.dedent(f""" + start_date: &start "2026-01-01T00:00" + stop_date: &stop "2026-07-01T00:00" + cycles: + - main: + cycling: + start_date: *start + stop_date: *stop + period: P6M + tasks: + - simulate: + inputs: + - config: + port: cfg + outputs: [sim_output] + - analyze: + inputs: + - sim_output: + parameters: {{foo: all, bar: single}} + port: data + outputs: [analysis] + tasks: + - simulate: + plugin: shell + command: "sim.py {{PORT::cfg}}" + src: {tmp_path}/sim.py + parameters: [foo, bar] + computer: localhost + - analyze: + plugin: shell + command: "analyze.py {{PORT::data}}" + src: {tmp_path}/analyze.py + parameters: [bar] + computer: localhost + data: + available: + - config: + type: file + src: {tmp_path}/config.txt + computer: localhost + generated: + - sim_output: + type: file + src: output.dat + parameters: [foo, bar] + - analysis: + type: file + src: analysis.txt + parameters: [bar] + parameters: + foo: [0, 1] + bar: [3.0] + """) + + config_file = tmp_path / "config.yml" + config_file.write_text(yaml_str) + + # Create files + (tmp_path / "config.txt").touch() + (tmp_path / "sim.py").touch() + (tmp_path / "analyze.py").touch() + + core_wf = Workflow.from_config_file(str(config_file)) + aiida_wf = AiidaWorkGraph(core_wf) + + # Verify task structure + sim_tasks = [t for t in aiida_wf._workgraph.tasks if t.name.startswith("simulate")] + analyze_tasks = [t for t in aiida_wf._workgraph.tasks if t.name.startswith("analyze")] + + assert len(sim_tasks) == 2 # 2 foo values, and 1 bar value -> 2 tasks + assert len(analyze_tasks) == 1 # 1 bar value -> 1 task + + # Check simulate tasks (should have simple config filename) + for task in sim_tasks: + filenames = task.inputs.filenames.value + assert filenames["config"] == "config.txt" # No conflict, simple name + + # Check analyze task (should have complex filenames due to conflicts) + analyze_task = analyze_tasks[0] + filenames = analyze_task.inputs.filenames.value + + # Should have 2 sim_output inputs with full labels as filenames + sim_output_keys = [k for k in filenames if k.startswith("sim_output")] + assert len(sim_output_keys) == 2 + + for key in sim_output_keys: + assert filenames[key] == key # Full label used as filename + assert "foo_" in key + assert "bar_3_0" in key + + +# PRCOMMENT: Kept this hardcoded, explicit test based on the `parameters` case +# Can probably be removed as the other tests cover the behavior, but wanted to keep for now @pytest.mark.usefixtures("aiida_localhost") def test_comprehensive_parameterized_explicit(tmp_path): import pathlib @@ -579,117 +585,6 @@ def test_comprehensive_parameterized_explicit(tmp_path): bar: [3.0] """ ) - # yaml_str = textwrap.dedent( - # """ - # start_date: &root_start_date "2026-01-01T00:00" - # stop_date: &root_stop_date "2028-01-01T00:00" - # cycles: - # - bimonthly_tasks: - # cycling: - # start_date: *root_start_date - # stop_date: *root_stop_date - # period: P6M - # tasks: - # - icon: - # inputs: - # - initial_conditions: - # when: - # at: *root_start_date - # port: init - # - icon_restart: - # when: - # after: *root_start_date - # target_cycle: - # lag: -P6M - # parameters: - # foo: single - # bar: single - # port: restart - # - forcing: - # port: forcing - # outputs: [icon_output, icon_restart] - # - statistics_foo: - # inputs: - # - icon_output: - # parameters: - # bar: single - # port: None - # outputs: [analysis_foo] - # - statistics_foo_bar: - # inputs: - # - analysis_foo: - # port: None - # outputs: [analysis_foo_bar] - # - yearly: - # cycling: - # start_date: *root_start_date - # stop_date: *root_stop_date - # period: P1Y - # tasks: - # - merge: - # inputs: - # - analysis_foo_bar: - # target_cycle: - # lag: ["P0M", "P6M"] - # port: None - # outputs: [yearly_analysis] - # tasks: - # - icon: - # plugin: shell - # src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/icon.py - # command: "icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}" - # parameters: [foo, bar] - # computer: localhost - # - statistics_foo: - # plugin: shell - # src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/statistics.py - # command: "statistics.py {PORT::None}" - # parameters: [bar] - # computer: localhost - # - statistics_foo_bar: - # plugin: shell - # src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/statistics.py - # command: "statistics.py {PORT::None}" - # computer: localhost - # - merge: - # plugin: shell - # src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/scripts/merge.py - # command: "merge.py {PORT::None}" - # computer: localhost - # data: - # available: - # - initial_conditions: - # type: file - # src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/small/config/data/initial_conditions - # computer: localhost - # - forcing: - # type: file - # src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/parameters/config/data/forcing - # computer: localhost - # generated: - # - icon_output: - # type: file - # src: icon_output - # parameters: [foo, bar] - # - icon_restart: - # type: file - # src: restart - # parameters: [foo, bar] - # - analysis_foo: - # type: file - # src: analysis - # parameters: [bar] - # - analysis_foo_bar: - # type: file - # src: analysis - # - yearly_analysis: - # type: file - # src: analysis - # parameters: - # foo: [0, 1] - # bar: [3.0] - # """ - # ) yaml_file = tmp_path / "config.yml" yaml_file.write_text(yaml_str) @@ -697,9 +592,7 @@ def test_comprehensive_parameterized_explicit(tmp_path): aiida_wf = AiidaWorkGraph(core_workflow=core_wf) filenames_list = [task.inputs.filenames.value for task in aiida_wf._workgraph.tasks] arguments_list = [task.inputs.arguments.value for task in aiida_wf._workgraph.tasks] - nodes_list = [ - list(task.inputs.nodes._sockets.keys()) for task in aiida_wf._workgraph.tasks - ] + nodes_list = [list(task.inputs.nodes._sockets.keys()) for task in aiida_wf._workgraph.tasks] expected_filenames_list = [ {"forcing": "forcing", "initial_conditions": "initial_conditions"}, @@ -761,18 +654,12 @@ def test_comprehensive_parameterized_explicit(tmp_path): expected_arguments_list = [ "--restart --init {initial_conditions} --forcing {forcing}", "--restart --init {initial_conditions} --forcing {forcing}", - "--restart {icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00} --init " - "--forcing {forcing}", - "--restart {icon_restart_foo_1___bar_3_0___date_2026_01_01_00_00_00} --init " - "--forcing {forcing}", - "--restart {icon_restart_foo_0___bar_3_0___date_2026_07_01_00_00_00} --init " - "--forcing {forcing}", - "--restart {icon_restart_foo_1___bar_3_0___date_2026_07_01_00_00_00} --init " - "--forcing {forcing}", - "--restart {icon_restart_foo_0___bar_3_0___date_2027_01_01_00_00_00} --init " - "--forcing {forcing}", - "--restart {icon_restart_foo_1___bar_3_0___date_2027_01_01_00_00_00} --init " - "--forcing {forcing}", + "--restart {icon_restart_foo_0___bar_3_0___date_2026_01_01_00_00_00} --init " "--forcing {forcing}", + "--restart {icon_restart_foo_1___bar_3_0___date_2026_01_01_00_00_00} --init " "--forcing {forcing}", + "--restart {icon_restart_foo_0___bar_3_0___date_2026_07_01_00_00_00} --init " "--forcing {forcing}", + "--restart {icon_restart_foo_1___bar_3_0___date_2026_07_01_00_00_00} --init " "--forcing {forcing}", + "--restart {icon_restart_foo_0___bar_3_0___date_2027_01_01_00_00_00} --init " "--forcing {forcing}", + "--restart {icon_restart_foo_1___bar_3_0___date_2027_01_01_00_00_00} --init " "--forcing {forcing}", "{icon_output_foo_0___bar_3_0___date_2026_01_01_00_00_00} " "{icon_output_foo_1___bar_3_0___date_2026_01_01_00_00_00}", "{icon_output_foo_0___bar_3_0___date_2026_07_01_00_00_00} " @@ -785,10 +672,8 @@ def test_comprehensive_parameterized_explicit(tmp_path): "{analysis_foo_bar_3_0___date_2026_07_01_00_00_00}", "{analysis_foo_bar_3_0___date_2027_01_01_00_00_00}", "{analysis_foo_bar_3_0___date_2027_07_01_00_00_00}", - "{analysis_foo_bar_date_2026_01_01_00_00_00} " - "{analysis_foo_bar_date_2026_07_01_00_00_00}", - "{analysis_foo_bar_date_2027_01_01_00_00_00} " - "{analysis_foo_bar_date_2027_07_01_00_00_00}", + "{analysis_foo_bar_date_2026_01_01_00_00_00} " "{analysis_foo_bar_date_2026_07_01_00_00_00}", + "{analysis_foo_bar_date_2027_01_01_00_00_00} " "{analysis_foo_bar_date_2027_07_01_00_00_00}", ] expected_nodes_list = [ From b4ca8ff214c45349c369de299e12bdf13e8e88a8 Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Tue, 3 Jun 2025 08:57:02 +0200 Subject: [PATCH 20/27] . --- .gitignore | 2 ++ pyproject.toml | 2 +- src/sirocco/workgraph.py | 4 ++-- tests/cases/parameters/config/config.yml | 8 +++++--- tests/conftest.py | 6 +++--- tests/test_wc_workflow.py | 6 +++--- 6 files changed, 16 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index c3d39320..7d3cf6e1 100644 --- a/.gitignore +++ b/.gitignore @@ -165,3 +165,5 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +pyrightconfig.json diff --git a/pyproject.toml b/pyproject.toml index d767ff71..35d95c6a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,7 @@ Changelog = "https://github.com/C2SM/Sirocco/blob/main/CHANGELOG.md" [tool.pytest.ini_options] # Configuration for [pytest](https://docs.pytest.org) -addopts = "--pdbcls=IPython.terminal.debugger:TerminalPdb" +addopts = "-s --pdbcls=IPython.terminal.debugger:TerminalPdb" norecursedirs = "tests/cases" markers = [ "slow: slow integration tests which are not recommended to run locally for normal development", diff --git a/src/sirocco/workgraph.py b/src/sirocco/workgraph.py index bf38cb65..cd2223eb 100644 --- a/src/sirocco/workgraph.py +++ b/src/sirocco/workgraph.py @@ -14,6 +14,7 @@ if TYPE_CHECKING: from aiida_workgraph.socket import TaskSocket # type: ignore[import-untyped] + from aiida_workgraph.sockets.builtins import SocketAny WorkgraphDataNode: TypeAlias = aiida.orm.RemoteData | aiida.orm.SinglefileData | aiida.orm.FolderData @@ -311,7 +312,7 @@ def _link_wait_on_to_task(self, task: core.Task): def _set_shelljob_arguments(self, task: core.ShellTask): """Set AiiDA ShellJob arguments by replacing port placeholders with AiiDA labels.""" workgraph_task = self.task_from_core(task) - workgraph_task_arguments = workgraph_task.inputs.arguments + workgraph_task_arguments: SocketAny = workgraph_task.inputs.arguments if workgraph_task_arguments is None: msg = ( @@ -374,7 +375,6 @@ def run( inputs: None | dict[str, Any] = None, metadata: None | dict[str, Any] = None, ) -> aiida.orm.Node: - # import ipdb; ipdb.set_trace() self._workgraph.run(inputs=inputs, metadata=metadata) if (output_node := self._workgraph.process) is None: # The node should not be None after a run, it should contain exit code and message so if the node is None something internal went wrong diff --git a/tests/cases/parameters/config/config.yml b/tests/cases/parameters/config/config.yml index 477b02e0..92904ebd 100644 --- a/tests/cases/parameters/config/config.yml +++ b/tests/cases/parameters/config/config.yml @@ -1,3 +1,4 @@ +--- start_date: &root_start_date "2026-01-01T00:00" stop_date: &root_stop_date "2028-01-01T00:00" @@ -55,9 +56,10 @@ cycles: tasks: - icon: plugin: shell - # For relative path execpt if this cannot be resolved to a registered code - # Probably shouldn't enforce to absolute path; or provide a code argument instead, this should be - # unambigous + # PRCOMMENT + # Relative path -> except if this cannot be resolved to a registered code + # Probably either enforce absolute path, or provide a code argument + # See: https://github.com/C2SM/Sirocco/pull/153 src: scripts/icon.py command: "icon.py --restart {PORT::restart} --init {PORT::init} --forcing {PORT::forcing}" parameters: [foo, bar] diff --git a/tests/conftest.py b/tests/conftest.py index b4483f33..a4437c0f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,7 +21,7 @@ def __init__(self, url: str, response: requests.Response): def download_file(url: str, file_path: pathlib.Path): - response = requests.get(url) # noqa: S113 + response = requests.get(url) # noqa: S113 request-without-timeout if not response.ok: raise DownloadError(url, response) @@ -87,7 +87,7 @@ def minimal_invert_task_io_config() -> models.ConfigWorkflow: ), models.ConfigCycleTask( name="task_a", - inputs=[models.ConfigCycleTaskInput(name="availalble", port="None")], + inputs=[models.ConfigCycleTaskInput(name="available", port="None")], outputs=[models.ConfigCycleTaskOutput(name="output_a")], ), ], @@ -100,7 +100,7 @@ def minimal_invert_task_io_config() -> models.ConfigWorkflow: data=models.ConfigData( available=[ models.ConfigAvailableData( - name="availalble", + name="available", type=models.DataType.FILE, src=pathlib.Path("foo.txt"), ) diff --git a/tests/test_wc_workflow.py b/tests/test_wc_workflow.py index ef808950..34dc4855 100644 --- a/tests/test_wc_workflow.py +++ b/tests/test_wc_workflow.py @@ -32,6 +32,7 @@ def test_icon(): # configs that are tested for running workgraph @pytest.mark.slow +@pytest.mark.usefixtures('aiida_localhost') @pytest.mark.parametrize( "config_case", [ @@ -39,18 +40,17 @@ def test_icon(): "parameters", ], ) -def test_run_workgraph(config_case, config_paths, aiida_computer): # noqa: ARG001 # config_case is overridden +def test_run_workgraph(config_case, config_paths): # noqa: ARG001 # config_case is overridden """Tests end-to-end the parsing from file up to running the workgraph. Automatically uses the aiida_profile fixture to create a new profile. Note to debug the test with your profile please run this in a separate file as the profile is deleted after test finishes. """ - # some configs reference computer "localhost" which we need to create beforehand - aiida_computer("localhost").store() core_workflow = Workflow.from_config_file(str(config_paths["yml"])) aiida_workflow = AiidaWorkGraph(core_workflow) output_node = aiida_workflow.run() + import ipdb; ipdb.set_trace() assert ( output_node.is_finished_ok ), f"Not successful run. Got exit code {output_node.exit_code} with message {output_node.exit_message}." From 4807db6bc47416ed2494ebd5641d9078c0e60f50 Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Tue, 3 Jun 2025 09:02:17 +0200 Subject: [PATCH 21/27] . --- tests/conftest.py | 3 ++- tests/test_wc_workflow.py | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index a4437c0f..0b3aa8a0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -158,7 +158,8 @@ def serialize_nml(config_paths: dict[str, pathlib.Path], workflow: workflow.Work def pytest_configure(config): if config.getoption("reserialize"): - print("Regenerating serialized references") # this is actual UX, not a debug print + print("Regenerating serialized references") + # this is actual UX, not a debug print for config_case in ALL_CONFIG_CASES: config_paths = generate_config_paths(config_case) wf = workflow.Workflow.from_config_file(str(config_paths["yml"])) diff --git a/tests/test_wc_workflow.py b/tests/test_wc_workflow.py index 34dc4855..6c9359cd 100644 --- a/tests/test_wc_workflow.py +++ b/tests/test_wc_workflow.py @@ -32,7 +32,7 @@ def test_icon(): # configs that are tested for running workgraph @pytest.mark.slow -@pytest.mark.usefixtures('aiida_localhost') +@pytest.mark.usefixtures("aiida_localhost") @pytest.mark.parametrize( "config_case", [ @@ -50,7 +50,6 @@ def test_run_workgraph(config_case, config_paths): # noqa: ARG001 # config_cas core_workflow = Workflow.from_config_file(str(config_paths["yml"])) aiida_workflow = AiidaWorkGraph(core_workflow) output_node = aiida_workflow.run() - import ipdb; ipdb.set_trace() assert ( output_node.is_finished_ok ), f"Not successful run. Got exit code {output_node.exit_code} with message {output_node.exit_message}." From 6a42fb8d172832c3cf664012ac489dad262d3ad1 Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Tue, 3 Jun 2025 09:44:12 +0200 Subject: [PATCH 22/27] . --- pyproject.toml | 2 +- src/sirocco/workgraph.py | 47 +++++++++++++++++++++++++++++ tests/cases/small/config/config.yml | 6 ++-- tests/test-run.py | 13 ++++++++ 4 files changed, 65 insertions(+), 3 deletions(-) create mode 100644 tests/test-run.py diff --git a/pyproject.toml b/pyproject.toml index 35d95c6a..e66ad238 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ "pygraphviz", "lxml", "f90nml", - "aiida-shell @ git+https://github.com/sphuber/aiida-shell.git@master", + "aiida-shell>=0.8.1", ] license = {file = "LICENSE"} diff --git a/src/sirocco/workgraph.py b/src/sirocco/workgraph.py index cd2223eb..ae39eab9 100644 --- a/src/sirocco/workgraph.py +++ b/src/sirocco/workgraph.py @@ -342,6 +342,7 @@ def _set_shelljob_filenames(self, task: core.ShellTask): if not workgraph_task.inputs.filenames: return + # Handle input files for input_ in task.input_data_nodes(): input_label = self.get_aiida_label_from_graph_item(input_) @@ -368,8 +369,54 @@ def _set_shelljob_filenames(self, task: core.ShellTask): # The key in filenames dict should be the input label (what's used in nodes dict) filenames[input_label] = filename + # Handle output files + for output in task.output_data_nodes(): + output_label = self.get_aiida_label_from_graph_item(output) + + # For outputs, we always use the original src filename + # The script should create files with these names, and AiiDA will retrieve them + # Parameterization conflicts are handled later when files are copied/symlinked for analysis + output_filename = Path(output.src).name + filenames[output_label] = output_filename + workgraph_task.inputs.filenames.value = filenames + # def _set_shelljob_filenames(self, task: core.ShellTask): + # """Set AiiDA ShellJob filenames for data entities, including parameterized data.""" + # filenames = {} + # workgraph_task = self.task_from_core(task) + + # if not workgraph_task.inputs.filenames: + # return + + # for input_ in task.input_data_nodes(): + # input_label = self.get_aiida_label_from_graph_item(input_) + + # if task.computer and input_.computer and isinstance(input_, core.AvailableData): + # # For RemoteData on the same computer, use just the filename + # filename = Path(input_.src).name + # filenames[input_.name] = filename + # else: + # # For other cases (including GeneratedData), we need to handle parameterized data + # # Importantly, multiple data nodes with the same base name but different + # # coordinates need unique filenames to avoid conflicts in the working directory + + # # Count how many inputs have the same base name + # same_name_count = sum(1 for inp in task.input_data_nodes() if inp.name == input_.name) + + # if same_name_count > 1: + # # Multiple data nodes with same base name - use full label as filename + # # to ensure uniqueness in working directory + # filename = input_label + # else: + # # Single data node with this name - can use simple filename + # filename = Path(input_.src).name if hasattr(input_, "src") else input_.name + + # # The key in filenames dict should be the input label (what's used in nodes dict) + # filenames[input_label] = filename + + # workgraph_task.inputs.filenames.value = filenames + def run( self, inputs: None | dict[str, Any] = None, diff --git a/tests/cases/small/config/config.yml b/tests/cases/small/config/config.yml index a2871b5d..535ccf72 100644 --- a/tests/cases/small/config/config.yml +++ b/tests/cases/small/config/config.yml @@ -47,11 +47,13 @@ data: available: - icon_namelist: type: file - src: data/input + src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/small/config/data/input + # src: data/input # This is the original code - initial_conditions: type: file computer: localhost - src: data/initial_conditions + src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/small/config/data/initial_conditions + # src: data/initial_conditions # This is the original code generated: - icon_output: type: file diff --git a/tests/test-run.py b/tests/test-run.py new file mode 100644 index 00000000..9d80ec29 --- /dev/null +++ b/tests/test-run.py @@ -0,0 +1,13 @@ +from aiida import load_profile + +from sirocco.core import Workflow +from sirocco.workgraph import AiidaWorkGraph + +load_profile() + + +core_workflow = Workflow.from_config_file( + "/home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/small/config/config.yml" +) +aiida_workflow = AiidaWorkGraph(core_workflow) +output_node = aiida_workflow.run() \ No newline at end of file From 3676b14cfb572f1fadf6eabf717a26415132b066 Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Tue, 3 Jun 2025 10:17:55 +0200 Subject: [PATCH 23/27] Allow and properly resolve relative AvailableData src on localhost to make tests pass. --- src/sirocco/workgraph.py | 12 +++++++++--- tests/cases/small/config/config.yml | 6 ++---- tests/test-run.py | 13 ------------- 3 files changed, 11 insertions(+), 20 deletions(-) delete mode 100644 tests/test-run.py diff --git a/src/sirocco/workgraph.py b/src/sirocco/workgraph.py index ae39eab9..3db7182f 100644 --- a/src/sirocco/workgraph.py +++ b/src/sirocco/workgraph.py @@ -194,9 +194,15 @@ def _add_aiida_input_data_node(self, data: core.Data): msg = f"Could not find computer {data.computer!r} for input {data}." raise ValueError(msg) from err # `remote_path` must be str not PosixPath to be JSON-serializable - self._aiida_data_nodes[label] = aiida.orm.RemoteData( - remote_path=str(data.src), label=label, computer=computer - ) + # PRCOMMENT: Hack for now to make the tests pass + if computer.label == 'localhost': + self._aiida_data_nodes[label] = aiida.orm.RemoteData( + remote_path=str(data_full_path), label=label, computer=computer + ) + else: + self._aiida_data_nodes[label] = aiida.orm.RemoteData( + remote_path=str(data.src), label=label, computer=computer + ) elif data.type == "file": self._aiida_data_nodes[label] = aiida.orm.SinglefileData(label=label, file=data_full_path) elif data.type == "dir": diff --git a/tests/cases/small/config/config.yml b/tests/cases/small/config/config.yml index 535ccf72..a2871b5d 100644 --- a/tests/cases/small/config/config.yml +++ b/tests/cases/small/config/config.yml @@ -47,13 +47,11 @@ data: available: - icon_namelist: type: file - src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/small/config/data/input - # src: data/input # This is the original code + src: data/input - initial_conditions: type: file computer: localhost - src: /home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/small/config/data/initial_conditions - # src: data/initial_conditions # This is the original code + src: data/initial_conditions generated: - icon_output: type: file diff --git a/tests/test-run.py b/tests/test-run.py deleted file mode 100644 index 9d80ec29..00000000 --- a/tests/test-run.py +++ /dev/null @@ -1,13 +0,0 @@ -from aiida import load_profile - -from sirocco.core import Workflow -from sirocco.workgraph import AiidaWorkGraph - -load_profile() - - -core_workflow = Workflow.from_config_file( - "/home/geiger_j/aiida_projects/swiss-twins/git-repos/Sirocco/tests/cases/small/config/config.yml" -) -aiida_workflow = AiidaWorkGraph(core_workflow) -output_node = aiida_workflow.run() \ No newline at end of file From 5237cdf238a4a7afed3b8446d0ddf8fc5865e2ae Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Tue, 3 Jun 2025 10:21:39 +0200 Subject: [PATCH 24/27] . --- src/sirocco/workgraph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sirocco/workgraph.py b/src/sirocco/workgraph.py index 3db7182f..f8675da5 100644 --- a/src/sirocco/workgraph.py +++ b/src/sirocco/workgraph.py @@ -195,7 +195,7 @@ def _add_aiida_input_data_node(self, data: core.Data): raise ValueError(msg) from err # `remote_path` must be str not PosixPath to be JSON-serializable # PRCOMMENT: Hack for now to make the tests pass - if computer.label == 'localhost': + if computer.label == "localhost": self._aiida_data_nodes[label] = aiida.orm.RemoteData( remote_path=str(data_full_path), label=label, computer=computer ) From 7c3bd6833311d71606cb9e6e11828add1684ab3b Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Tue, 3 Jun 2025 10:24:07 +0200 Subject: [PATCH 25/27] . --- .gitignore | 2 -- 1 file changed, 2 deletions(-) diff --git a/.gitignore b/.gitignore index 7d3cf6e1..c3d39320 100644 --- a/.gitignore +++ b/.gitignore @@ -165,5 +165,3 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ - -pyrightconfig.json From a565fb103d74f6ee3e28ef31ba9ec6016b9587b5 Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Tue, 3 Jun 2025 10:29:34 +0200 Subject: [PATCH 26/27] . --- pyproject.toml | 3 --- src/sirocco/workgraph.py | 36 ------------------------------------ tests/conftest.py | 3 +-- 3 files changed, 1 insertion(+), 41 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e66ad238..53dae8fd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,9 +86,6 @@ ignore = [ ## Hatch configurations -[tool.hatch.metadata] -allow-direct-references = true - [tool.hatch.build.targets.sdist] include = [ "src/sirocco/", diff --git a/src/sirocco/workgraph.py b/src/sirocco/workgraph.py index f8675da5..512f9f0f 100644 --- a/src/sirocco/workgraph.py +++ b/src/sirocco/workgraph.py @@ -387,42 +387,6 @@ def _set_shelljob_filenames(self, task: core.ShellTask): workgraph_task.inputs.filenames.value = filenames - # def _set_shelljob_filenames(self, task: core.ShellTask): - # """Set AiiDA ShellJob filenames for data entities, including parameterized data.""" - # filenames = {} - # workgraph_task = self.task_from_core(task) - - # if not workgraph_task.inputs.filenames: - # return - - # for input_ in task.input_data_nodes(): - # input_label = self.get_aiida_label_from_graph_item(input_) - - # if task.computer and input_.computer and isinstance(input_, core.AvailableData): - # # For RemoteData on the same computer, use just the filename - # filename = Path(input_.src).name - # filenames[input_.name] = filename - # else: - # # For other cases (including GeneratedData), we need to handle parameterized data - # # Importantly, multiple data nodes with the same base name but different - # # coordinates need unique filenames to avoid conflicts in the working directory - - # # Count how many inputs have the same base name - # same_name_count = sum(1 for inp in task.input_data_nodes() if inp.name == input_.name) - - # if same_name_count > 1: - # # Multiple data nodes with same base name - use full label as filename - # # to ensure uniqueness in working directory - # filename = input_label - # else: - # # Single data node with this name - can use simple filename - # filename = Path(input_.src).name if hasattr(input_, "src") else input_.name - - # # The key in filenames dict should be the input label (what's used in nodes dict) - # filenames[input_label] = filename - - # workgraph_task.inputs.filenames.value = filenames - def run( self, inputs: None | dict[str, Any] = None, diff --git a/tests/conftest.py b/tests/conftest.py index 0b3aa8a0..a4437c0f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -158,8 +158,7 @@ def serialize_nml(config_paths: dict[str, pathlib.Path], workflow: workflow.Work def pytest_configure(config): if config.getoption("reserialize"): - print("Regenerating serialized references") - # this is actual UX, not a debug print + print("Regenerating serialized references") # this is actual UX, not a debug print for config_case in ALL_CONFIG_CASES: config_paths = generate_config_paths(config_case) wf = workflow.Workflow.from_config_file(str(config_paths["yml"])) From 83aea88f2a6786ec613f0e31a86c148eda4ad562 Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Tue, 3 Jun 2025 10:45:29 +0200 Subject: [PATCH 27/27] Remove erroneously introduced output renaming. --- src/sirocco/workgraph.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/sirocco/workgraph.py b/src/sirocco/workgraph.py index 512f9f0f..121c8627 100644 --- a/src/sirocco/workgraph.py +++ b/src/sirocco/workgraph.py @@ -375,16 +375,6 @@ def _set_shelljob_filenames(self, task: core.ShellTask): # The key in filenames dict should be the input label (what's used in nodes dict) filenames[input_label] = filename - # Handle output files - for output in task.output_data_nodes(): - output_label = self.get_aiida_label_from_graph_item(output) - - # For outputs, we always use the original src filename - # The script should create files with these names, and AiiDA will retrieve them - # Parameterization conflicts are handled later when files are copied/symlinked for analysis - output_filename = Path(output.src).name - filenames[output_label] = output_filename - workgraph_task.inputs.filenames.value = filenames def run(