Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for multiple occurrences of design matrix #9583

Merged
merged 1 commit into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions src/ert/config/analysis_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def from_dict(cls, config_dict: ConfigDict) -> AnalysisConfig:

min_realization = min(min_realization, num_realization)

design_matrix_config_list = config_dict.get(ConfigKeys.DESIGN_MATRIX, None)
design_matrix_config_lists = config_dict.get(ConfigKeys.DESIGN_MATRIX, [])

options: dict[str, dict[str, Any]] = {"STD_ENKF": {}, "IES_ENKF": {}}
observation_settings: dict[str, Any] = {
Expand Down Expand Up @@ -186,15 +186,22 @@ def from_dict(cls, config_dict: ConfigDict) -> AnalysisConfig:
if all_errors:
raise ConfigValidationError.from_collected(all_errors)

design_matrices = [
DesignMatrix.from_config_list(design_matrix_config_list)
for design_matrix_config_list in design_matrix_config_lists
]
design_matrix: DesignMatrix | None = None
if design_matrices:
design_matrix = design_matrices[0]
for dm_other in design_matrices[1:]:
design_matrix.merge_with_other(dm_other)
config = cls(
minimum_required_realizations=min_realization,
update_log_path=config_dict.get(ConfigKeys.UPDATE_LOG_PATH, "update_log"),
observation_settings=obs_settings,
es_module=es_settings,
ies_module=ies_settings,
design_matrix=DesignMatrix.from_config_list(design_matrix_config_list)
if design_matrix_config_list is not None
else None,
design_matrix=design_matrix,
)
return config

Expand Down
32 changes: 32 additions & 0 deletions src/ert/config/design_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,38 @@ def from_config_list(cls, config_list: list[str]) -> DesignMatrix:
default_sheet=default_sheet,
)

def merge_with_other(self, dm_other: DesignMatrix) -> None:
errors = []
if self.active_realizations != dm_other.active_realizations:
errors.append(
ErrorInfo("Design Matrices don't have the same active realizations!")
)

common_keys = set(self.design_matrix_df.columns) & set(
dm_other.design_matrix_df.columns
)
if common_keys:
errors.append(
ErrorInfo(f"Design Matrices do not have unique keys {common_keys}!")
)

try:
self.design_matrix_df = pd.concat(
[self.design_matrix_df, dm_other.design_matrix_df], axis=1
)
except ValueError as exc:
errors.append(ErrorInfo(f"Error when merging design matrices {exc}!"))

pc_other = dm_other.parameter_configuration[DESIGN_MATRIX_GROUP]
pc_self = self.parameter_configuration[DESIGN_MATRIX_GROUP]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it due to mypy that we go via pc_self instead of just appending to self.parameter_configuration?

assert isinstance(pc_other, GenKwConfig)
assert isinstance(pc_self, GenKwConfig)
for tfd in pc_other.transform_function_definitions:
pc_self.transform_function_definitions.append(tfd)

if errors:
raise ConfigValidationError.from_collected(errors)

def merge_with_existing_parameters(
self, existing_parameters: list[ParameterConfig]
) -> tuple[list[ParameterConfig], ParameterConfig | None]:
Expand Down
2 changes: 1 addition & 1 deletion src/ert/config/parsing/config_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def design_matrix_keyword() -> SchemaItem:
SchemaItemType.STRING,
SchemaItemType.STRING,
],
multi_occurrence=False,
multi_occurrence=True,
)


Expand Down
142 changes: 117 additions & 25 deletions tests/ert/ui_tests/cli/analysis/test_design_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,30 @@
from tests.ert.ui_tests.cli.run_cli import run_cli


def _create_design_matrix(filename, design_sheet_df, default_sheet_df=None):
with pd.ExcelWriter(filename) as xl_write:
design_sheet_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01")
if default_sheet_df is not None:
default_sheet_df.to_excel(
xl_write, index=False, sheet_name="DefaultSheet", header=False
)


@pytest.mark.usefixtures("copy_poly_case")
def test_run_poly_example_with_design_matrix():
design_matrix = "poly_design.xlsx"
num_realizations = 10
a_values = list(range(num_realizations))
design_matrix_df = pd.DataFrame(
{
"REAL": list(range(num_realizations)),
"a": a_values,
"category": 5 * ["cat1"] + 5 * ["cat2"],
}
_create_design_matrix(
"poly_design.xlsx",
pd.DataFrame(
{
"REAL": list(range(num_realizations)),
"a": a_values,
"category": 5 * ["cat1"] + 5 * ["cat2"],
}
),
pd.DataFrame([["b", 1], ["c", 2]]),
)
default_sheet_df = pd.DataFrame([["b", 1], ["c", 2]])
with pd.ExcelWriter(design_matrix) as xl_write:
design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01")
default_sheet_df.to_excel(
xl_write, index=False, sheet_name="DefaultSheet", header=False
)

with open("poly.ert", "w", encoding="utf-8") as fout:
fout.write(
Expand Down Expand Up @@ -105,21 +111,18 @@ def _evaluate(coeffs, x):
],
)
def test_run_poly_example_with_design_matrix_and_genkw_merge(default_values, error_msg):
design_matrix = "poly_design.xlsx"
num_realizations = 10
a_values = list(range(num_realizations))
design_matrix_df = pd.DataFrame(
{
"REAL": list(range(num_realizations)),
"a": a_values,
}
_create_design_matrix(
"poly_design.xlsx",
pd.DataFrame(
{
"REAL": list(range(num_realizations)),
"a": a_values,
}
),
pd.DataFrame(default_values),
)
default_sheet_df = pd.DataFrame(default_values)
with pd.ExcelWriter(design_matrix) as xl_write:
design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01")
default_sheet_df.to_excel(
xl_write, index=False, sheet_name="DefaultSheet", header=False
)

with open("poly.ert", "w", encoding="utf-8") as fout:
fout.write(
Expand Down Expand Up @@ -191,3 +194,92 @@ def _evaluate(coeffs, x):
np.testing.assert_array_equal(params[:, 0], a_values)
np.testing.assert_array_equal(params[:, 1], 10 * [1])
np.testing.assert_array_equal(params[:, 2], 10 * [2])


@pytest.mark.usefixtures("copy_poly_case")
def test_run_poly_example_with_multiple_design_matrix_instances():
num_realizations = 10
a_values = list(range(num_realizations))
_create_design_matrix(
"poly_design_1.xlsx",
pd.DataFrame(
{
"REAL": list(range(num_realizations)),
"a": a_values,
}
),
pd.DataFrame([["b", 1], ["c", 2]]),
)
_create_design_matrix(
"poly_design_2.xlsx",
pd.DataFrame(
{
"REAL": list(range(num_realizations)),
"d": num_realizations * [3],
}
),
pd.DataFrame([["g", 4]]),
)

with open("poly.ert", "w", encoding="utf-8") as fout:
fout.write(
dedent(
"""\
QUEUE_OPTION LOCAL MAX_RUNNING 10
RUNPATH poly_out/realization-<IENS>/iter-<ITER>
NUM_REALIZATIONS 10
MIN_REALIZATIONS 1
GEN_DATA POLY_RES RESULT_FILE:poly.out
DESIGN_MATRIX poly_design_1.xlsx DESIGN_SHEET:DesignSheet01 DEFAULT_SHEET:DefaultSheet
DESIGN_MATRIX poly_design_2.xlsx DESIGN_SHEET:DesignSheet01 DEFAULT_SHEET:DefaultSheet
INSTALL_JOB poly_eval POLY_EVAL
FORWARD_MODEL poly_eval
"""
)
)

with open("poly_eval.py", "w", encoding="utf-8") as f:
f.write(
dedent(
"""\
#!/usr/bin/env python
import json

def _load_coeffs(filename):
with open(filename, encoding="utf-8") as f:
return json.load(f)["DESIGN_MATRIX"]

def _evaluate(coeffs, x):
return coeffs["a"] * x**2 + coeffs["b"] * x + coeffs["c"]

if __name__ == "__main__":
coeffs = _load_coeffs("parameters.json")
output = [_evaluate(coeffs, x) for x in range(10)]
with open("poly.out", "w", encoding="utf-8") as f:
f.write("\\n".join(map(str, output)))
"""
)
)
os.chmod(
"poly_eval.py",
os.stat("poly_eval.py").st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH,
)

run_cli(
ENSEMBLE_EXPERIMENT_MODE,
"--disable-monitor",
"poly.ert",
"--experiment-name",
"test-experiment",
)
storage_path = ErtConfig.from_file("poly.ert").ens_path
with open_storage(storage_path) as storage:
experiment = storage.get_experiment_by_name("test-experiment")
params = experiment.get_ensemble_by_name("default").load_parameters(
"DESIGN_MATRIX"
)["values"]
np.testing.assert_array_equal(params[:, 0], a_values)
np.testing.assert_array_equal(params[:, 1], 10 * [1])
np.testing.assert_array_equal(params[:, 2], 10 * [2])
np.testing.assert_array_equal(params[:, 3], 10 * [3])
np.testing.assert_array_equal(params[:, 4], 10 * [4])
32 changes: 20 additions & 12 deletions tests/ert/unit_tests/config/test_analysis_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,11 @@ def test_analysis_config_from_file_is_same_as_from_dict(monkeypatch, tmp_path):
("STD_ENKF", "ENKF_TRUNCATION", 0.8),
],
ConfigKeys.DESIGN_MATRIX: [
"my_design_matrix.xlsx",
larsevj marked this conversation as resolved.
Show resolved Hide resolved
"DESIGN_SHEET:my_sheet",
"DEFAULT_SHEET:my_default_sheet",
[
"my_design_matrix.xlsx",
"DESIGN_SHEET:my_sheet",
"DEFAULT_SHEET:my_default_sheet",
]
],
}
)
Expand Down Expand Up @@ -110,9 +112,11 @@ def test_invalid_design_matrix_format_raises_validation_error():
{
ConfigKeys.NUM_REALIZATIONS: 1,
ConfigKeys.DESIGN_MATRIX: [
"my_matrix.txt",
"DESIGN_SHEET:sheet1",
"DEFAULT_SHEET:sheet2",
[
"my_matrix.txt",
"DESIGN_SHEET:sheet1",
"DEFAULT_SHEET:sheet2",
],
],
}
)
Expand All @@ -123,9 +127,11 @@ def test_design_matrix_without_design_sheet_raises_validation_error():
AnalysisConfig.from_dict(
{
ConfigKeys.DESIGN_MATRIX: [
"my_matrix.xlsx",
"DESIGN_:design",
"DEFAULT_SHEET:default",
[
"my_matrix.xlsx",
"DESIGN_:design",
"DEFAULT_SHEET:default",
]
],
}
)
Expand All @@ -136,9 +142,11 @@ def test_design_matrix_without_default_sheet_raises_validation_error():
AnalysisConfig.from_dict(
{
ConfigKeys.DESIGN_MATRIX: [
"my_matrix.xlsx",
"DESIGN_SHEET:design",
"DEFAULT_:default",
[
"my_matrix.xlsx",
"DESIGN_SHEET:design",
"DEFAULT_:default",
]
],
}
)
Expand Down
81 changes: 81 additions & 0 deletions tests/ert/unit_tests/sensitivity_analysis/test_design_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,87 @@
from ert.config.gen_kw_config import GenKwConfig, TransformFunctionDefinition


def _create_design_matrix(xls_path, design_matrix_df, default_sheet_df) -> DesignMatrix:
with pd.ExcelWriter(xls_path) as xl_write:
design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01")
default_sheet_df.to_excel(
xl_write, index=False, sheet_name="DefaultValues", header=False
)
return DesignMatrix(xls_path, "DesignSheet01", "DefaultValues")


@pytest.mark.parametrize(
"design_sheet_pd, default_sheet_pd, error_msg",
[
pytest.param(
pd.DataFrame(
{
"REAL": [0, 1, 2],
"c": [1, 2, 3],
"d": [0, 2, 0],
}
),
pd.DataFrame([["e", 1]]),
"",
id="ok_merge",
),
pytest.param(
pd.DataFrame(
{
"REAL": [0, 1, 2],
"a": [1, 2, 3],
}
),
pd.DataFrame([["e", 1]]),
"Design Matrices do not have unique keys",
id="not_unique_keys",
),
pytest.param(
pd.DataFrame(
{
"REAL": [0, 1],
"d": [1, 2],
}
),
pd.DataFrame([["e", 1]]),
"Design Matrices don't have the same active realizations!",
id="not_same_acitve_realizations",
),
],
)
def test_merge_multiple_occurrences(
tmp_path, design_sheet_pd, default_sheet_pd, error_msg
):
design_matrix_1 = _create_design_matrix(
tmp_path / "design_matrix_1.xlsx",
pd.DataFrame(
{
"REAL": [0, 1, 2],
"a": [1, 2, 3],
"b": [0, 2, 0],
},
),
pd.DataFrame([["a", 1], ["b", 4]]),
)

design_matrix_2 = _create_design_matrix(
tmp_path / "design_matrix_2.xlsx", design_sheet_pd, default_sheet_pd
)
if error_msg:
with pytest.raises(ValueError, match=error_msg):
design_matrix_1.merge_with_other(design_matrix_2)
else:
design_matrix_1.merge_with_other(design_matrix_2)
larsevj marked this conversation as resolved.
Show resolved Hide resolved
design_params = design_matrix_1.parameter_configuration.get("DESIGN_MATRIX", [])
assert all(param in design_params for param in ("a", "b", "c", "d"))
assert design_matrix_1.active_realizations == [True, True, True]
df = design_matrix_1.design_matrix_df
np.testing.assert_equal(df[DESIGN_MATRIX_GROUP, "a"], np.array([1, 2, 3]))
np.testing.assert_equal(df[DESIGN_MATRIX_GROUP, "b"], np.array([0, 2, 0]))
np.testing.assert_equal(df[DESIGN_MATRIX_GROUP, "c"], np.array([1, 2, 3]))
np.testing.assert_equal(df[DESIGN_MATRIX_GROUP, "d"], np.array([0, 2, 0]))


@pytest.mark.parametrize(
"parameters, error_msg",
[
Expand Down
Loading