diff --git a/src/ert/config/analysis_config.py b/src/ert/config/analysis_config.py index 6a7784189e0..c76adfa9541 100644 --- a/src/ert/config/analysis_config.py +++ b/src/ert/config/analysis_config.py @@ -80,7 +80,7 @@ def from_dict(cls, config_dict: ConfigDict) -> AnalysisConfig: min_realization = min(min_realization, num_realization) - design_matrix_config_list = config_dict.get(ConfigKeys.DESIGN_MATRIX, None) + design_matrix_config_lists = config_dict.get(ConfigKeys.DESIGN_MATRIX, []) options: dict[str, dict[str, Any]] = {"STD_ENKF": {}, "IES_ENKF": {}} observation_settings: dict[str, Any] = { @@ -186,15 +186,22 @@ def from_dict(cls, config_dict: ConfigDict) -> AnalysisConfig: if all_errors: raise ConfigValidationError.from_collected(all_errors) + design_matrices = [ + DesignMatrix.from_config_list(design_matrix_config_list) + for design_matrix_config_list in design_matrix_config_lists + ] + design_matrix: DesignMatrix | None = None + if design_matrices: + design_matrix = design_matrices[0] + for dm_other in design_matrices[1:]: + design_matrix.merge_with_other(dm_other) config = cls( minimum_required_realizations=min_realization, update_log_path=config_dict.get(ConfigKeys.UPDATE_LOG_PATH, "update_log"), observation_settings=obs_settings, es_module=es_settings, ies_module=ies_settings, - design_matrix=DesignMatrix.from_config_list(design_matrix_config_list) - if design_matrix_config_list is not None - else None, + design_matrix=design_matrix, ) return config diff --git a/src/ert/config/design_matrix.py b/src/ert/config/design_matrix.py index 4a14ed0439c..4d38933c2a6 100644 --- a/src/ert/config/design_matrix.py +++ b/src/ert/config/design_matrix.py @@ -78,6 +78,38 @@ def from_config_list(cls, config_list: list[str]) -> DesignMatrix: default_sheet=default_sheet, ) + def merge_with_other(self, dm_other: DesignMatrix) -> None: + errors = [] + if self.active_realizations != dm_other.active_realizations: + errors.append( + ErrorInfo("Design Matrices don't have the same active realizations!") + ) + + common_keys = set(self.design_matrix_df.columns) & set( + dm_other.design_matrix_df.columns + ) + if common_keys: + errors.append( + ErrorInfo(f"Design Matrices do not have unique keys {common_keys}!") + ) + + try: + self.design_matrix_df = pd.concat( + [self.design_matrix_df, dm_other.design_matrix_df], axis=1 + ) + except ValueError as exc: + errors.append(ErrorInfo(f"Error when merging design matrices {exc}!")) + + pc_other = dm_other.parameter_configuration[DESIGN_MATRIX_GROUP] + pc_self = self.parameter_configuration[DESIGN_MATRIX_GROUP] + assert isinstance(pc_other, GenKwConfig) + assert isinstance(pc_self, GenKwConfig) + for tfd in pc_other.transform_function_definitions: + pc_self.transform_function_definitions.append(tfd) + + if errors: + raise ConfigValidationError.from_collected(errors) + def merge_with_existing_parameters( self, existing_parameters: list[ParameterConfig] ) -> tuple[list[ParameterConfig], ParameterConfig | None]: diff --git a/src/ert/config/parsing/config_schema.py b/src/ert/config/parsing/config_schema.py index 142b7f6bb70..ab50da51571 100644 --- a/src/ert/config/parsing/config_schema.py +++ b/src/ert/config/parsing/config_schema.py @@ -264,7 +264,7 @@ def design_matrix_keyword() -> SchemaItem: SchemaItemType.STRING, SchemaItemType.STRING, ], - multi_occurrence=False, + multi_occurrence=True, ) diff --git a/tests/ert/ui_tests/cli/analysis/test_design_matrix.py b/tests/ert/ui_tests/cli/analysis/test_design_matrix.py index e75f1b10833..727451f2cfb 100644 --- a/tests/ert/ui_tests/cli/analysis/test_design_matrix.py +++ b/tests/ert/ui_tests/cli/analysis/test_design_matrix.py @@ -13,24 +13,30 @@ from tests.ert.ui_tests.cli.run_cli import run_cli +def _create_design_matrix(filename, design_sheet_df, default_sheet_df=None): + with pd.ExcelWriter(filename) as xl_write: + design_sheet_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01") + if default_sheet_df is not None: + default_sheet_df.to_excel( + xl_write, index=False, sheet_name="DefaultSheet", header=False + ) + + @pytest.mark.usefixtures("copy_poly_case") def test_run_poly_example_with_design_matrix(): - design_matrix = "poly_design.xlsx" num_realizations = 10 a_values = list(range(num_realizations)) - design_matrix_df = pd.DataFrame( - { - "REAL": list(range(num_realizations)), - "a": a_values, - "category": 5 * ["cat1"] + 5 * ["cat2"], - } + _create_design_matrix( + "poly_design.xlsx", + pd.DataFrame( + { + "REAL": list(range(num_realizations)), + "a": a_values, + "category": 5 * ["cat1"] + 5 * ["cat2"], + } + ), + pd.DataFrame([["b", 1], ["c", 2]]), ) - default_sheet_df = pd.DataFrame([["b", 1], ["c", 2]]) - with pd.ExcelWriter(design_matrix) as xl_write: - design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01") - default_sheet_df.to_excel( - xl_write, index=False, sheet_name="DefaultSheet", header=False - ) with open("poly.ert", "w", encoding="utf-8") as fout: fout.write( @@ -105,21 +111,18 @@ def _evaluate(coeffs, x): ], ) def test_run_poly_example_with_design_matrix_and_genkw_merge(default_values, error_msg): - design_matrix = "poly_design.xlsx" num_realizations = 10 a_values = list(range(num_realizations)) - design_matrix_df = pd.DataFrame( - { - "REAL": list(range(num_realizations)), - "a": a_values, - } + _create_design_matrix( + "poly_design.xlsx", + pd.DataFrame( + { + "REAL": list(range(num_realizations)), + "a": a_values, + } + ), + pd.DataFrame(default_values), ) - default_sheet_df = pd.DataFrame(default_values) - with pd.ExcelWriter(design_matrix) as xl_write: - design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01") - default_sheet_df.to_excel( - xl_write, index=False, sheet_name="DefaultSheet", header=False - ) with open("poly.ert", "w", encoding="utf-8") as fout: fout.write( @@ -191,3 +194,92 @@ def _evaluate(coeffs, x): np.testing.assert_array_equal(params[:, 0], a_values) np.testing.assert_array_equal(params[:, 1], 10 * [1]) np.testing.assert_array_equal(params[:, 2], 10 * [2]) + + +@pytest.mark.usefixtures("copy_poly_case") +def test_run_poly_example_with_multiple_design_matrix_instances(): + num_realizations = 10 + a_values = list(range(num_realizations)) + _create_design_matrix( + "poly_design_1.xlsx", + pd.DataFrame( + { + "REAL": list(range(num_realizations)), + "a": a_values, + } + ), + pd.DataFrame([["b", 1], ["c", 2]]), + ) + _create_design_matrix( + "poly_design_2.xlsx", + pd.DataFrame( + { + "REAL": list(range(num_realizations)), + "d": num_realizations * [3], + } + ), + pd.DataFrame([["g", 4]]), + ) + + with open("poly.ert", "w", encoding="utf-8") as fout: + fout.write( + dedent( + """\ + QUEUE_OPTION LOCAL MAX_RUNNING 10 + RUNPATH poly_out/realization-/iter- + NUM_REALIZATIONS 10 + MIN_REALIZATIONS 1 + GEN_DATA POLY_RES RESULT_FILE:poly.out + DESIGN_MATRIX poly_design_1.xlsx DESIGN_SHEET:DesignSheet01 DEFAULT_SHEET:DefaultSheet + DESIGN_MATRIX poly_design_2.xlsx DESIGN_SHEET:DesignSheet01 DEFAULT_SHEET:DefaultSheet + INSTALL_JOB poly_eval POLY_EVAL + FORWARD_MODEL poly_eval + """ + ) + ) + + with open("poly_eval.py", "w", encoding="utf-8") as f: + f.write( + dedent( + """\ + #!/usr/bin/env python + import json + + def _load_coeffs(filename): + with open(filename, encoding="utf-8") as f: + return json.load(f)["DESIGN_MATRIX"] + + def _evaluate(coeffs, x): + return coeffs["a"] * x**2 + coeffs["b"] * x + coeffs["c"] + + if __name__ == "__main__": + coeffs = _load_coeffs("parameters.json") + output = [_evaluate(coeffs, x) for x in range(10)] + with open("poly.out", "w", encoding="utf-8") as f: + f.write("\\n".join(map(str, output))) + """ + ) + ) + os.chmod( + "poly_eval.py", + os.stat("poly_eval.py").st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH, + ) + + run_cli( + ENSEMBLE_EXPERIMENT_MODE, + "--disable-monitor", + "poly.ert", + "--experiment-name", + "test-experiment", + ) + storage_path = ErtConfig.from_file("poly.ert").ens_path + with open_storage(storage_path) as storage: + experiment = storage.get_experiment_by_name("test-experiment") + params = experiment.get_ensemble_by_name("default").load_parameters( + "DESIGN_MATRIX" + )["values"] + np.testing.assert_array_equal(params[:, 0], a_values) + np.testing.assert_array_equal(params[:, 1], 10 * [1]) + np.testing.assert_array_equal(params[:, 2], 10 * [2]) + np.testing.assert_array_equal(params[:, 3], 10 * [3]) + np.testing.assert_array_equal(params[:, 4], 10 * [4]) diff --git a/tests/ert/unit_tests/config/test_analysis_config.py b/tests/ert/unit_tests/config/test_analysis_config.py index 8ae9b818f7a..52199b8c940 100644 --- a/tests/ert/unit_tests/config/test_analysis_config.py +++ b/tests/ert/unit_tests/config/test_analysis_config.py @@ -47,9 +47,11 @@ def test_analysis_config_from_file_is_same_as_from_dict(monkeypatch, tmp_path): ("STD_ENKF", "ENKF_TRUNCATION", 0.8), ], ConfigKeys.DESIGN_MATRIX: [ - "my_design_matrix.xlsx", - "DESIGN_SHEET:my_sheet", - "DEFAULT_SHEET:my_default_sheet", + [ + "my_design_matrix.xlsx", + "DESIGN_SHEET:my_sheet", + "DEFAULT_SHEET:my_default_sheet", + ] ], } ) @@ -110,9 +112,11 @@ def test_invalid_design_matrix_format_raises_validation_error(): { ConfigKeys.NUM_REALIZATIONS: 1, ConfigKeys.DESIGN_MATRIX: [ - "my_matrix.txt", - "DESIGN_SHEET:sheet1", - "DEFAULT_SHEET:sheet2", + [ + "my_matrix.txt", + "DESIGN_SHEET:sheet1", + "DEFAULT_SHEET:sheet2", + ], ], } ) @@ -123,9 +127,11 @@ def test_design_matrix_without_design_sheet_raises_validation_error(): AnalysisConfig.from_dict( { ConfigKeys.DESIGN_MATRIX: [ - "my_matrix.xlsx", - "DESIGN_:design", - "DEFAULT_SHEET:default", + [ + "my_matrix.xlsx", + "DESIGN_:design", + "DEFAULT_SHEET:default", + ] ], } ) @@ -136,9 +142,11 @@ def test_design_matrix_without_default_sheet_raises_validation_error(): AnalysisConfig.from_dict( { ConfigKeys.DESIGN_MATRIX: [ - "my_matrix.xlsx", - "DESIGN_SHEET:design", - "DEFAULT_:default", + [ + "my_matrix.xlsx", + "DESIGN_SHEET:design", + "DEFAULT_:default", + ] ], } ) diff --git a/tests/ert/unit_tests/sensitivity_analysis/test_design_matrix.py b/tests/ert/unit_tests/sensitivity_analysis/test_design_matrix.py index 3ed8c7309e2..35408875d47 100644 --- a/tests/ert/unit_tests/sensitivity_analysis/test_design_matrix.py +++ b/tests/ert/unit_tests/sensitivity_analysis/test_design_matrix.py @@ -6,6 +6,87 @@ from ert.config.gen_kw_config import GenKwConfig, TransformFunctionDefinition +def _create_design_matrix(xls_path, design_matrix_df, default_sheet_df) -> DesignMatrix: + with pd.ExcelWriter(xls_path) as xl_write: + design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01") + default_sheet_df.to_excel( + xl_write, index=False, sheet_name="DefaultValues", header=False + ) + return DesignMatrix(xls_path, "DesignSheet01", "DefaultValues") + + +@pytest.mark.parametrize( + "design_sheet_pd, default_sheet_pd, error_msg", + [ + pytest.param( + pd.DataFrame( + { + "REAL": [0, 1, 2], + "c": [1, 2, 3], + "d": [0, 2, 0], + } + ), + pd.DataFrame([["e", 1]]), + "", + id="ok_merge", + ), + pytest.param( + pd.DataFrame( + { + "REAL": [0, 1, 2], + "a": [1, 2, 3], + } + ), + pd.DataFrame([["e", 1]]), + "Design Matrices do not have unique keys", + id="not_unique_keys", + ), + pytest.param( + pd.DataFrame( + { + "REAL": [0, 1], + "d": [1, 2], + } + ), + pd.DataFrame([["e", 1]]), + "Design Matrices don't have the same active realizations!", + id="not_same_acitve_realizations", + ), + ], +) +def test_merge_multiple_occurrences( + tmp_path, design_sheet_pd, default_sheet_pd, error_msg +): + design_matrix_1 = _create_design_matrix( + tmp_path / "design_matrix_1.xlsx", + pd.DataFrame( + { + "REAL": [0, 1, 2], + "a": [1, 2, 3], + "b": [0, 2, 0], + }, + ), + pd.DataFrame([["a", 1], ["b", 4]]), + ) + + design_matrix_2 = _create_design_matrix( + tmp_path / "design_matrix_2.xlsx", design_sheet_pd, default_sheet_pd + ) + if error_msg: + with pytest.raises(ValueError, match=error_msg): + design_matrix_1.merge_with_other(design_matrix_2) + else: + design_matrix_1.merge_with_other(design_matrix_2) + design_params = design_matrix_1.parameter_configuration.get("DESIGN_MATRIX", []) + assert all(param in design_params for param in ("a", "b", "c", "d")) + assert design_matrix_1.active_realizations == [True, True, True] + df = design_matrix_1.design_matrix_df + np.testing.assert_equal(df[DESIGN_MATRIX_GROUP, "a"], np.array([1, 2, 3])) + np.testing.assert_equal(df[DESIGN_MATRIX_GROUP, "b"], np.array([0, 2, 0])) + np.testing.assert_equal(df[DESIGN_MATRIX_GROUP, "c"], np.array([1, 2, 3])) + np.testing.assert_equal(df[DESIGN_MATRIX_GROUP, "d"], np.array([0, 2, 0])) + + @pytest.mark.parametrize( "parameters, error_msg", [