diff --git a/.github/workflows/fmu-ensemble.yml b/.github/workflows/fmu-ensemble.yml index d3f8f19f..04786bb4 100644 --- a/.github/workflows/fmu-ensemble.yml +++ b/.github/workflows/fmu-ensemble.yml @@ -38,6 +38,7 @@ jobs: - name: Install fmu-ensemble with dependencies run: | pip install --upgrade pip + pip install libecl pip install . - name: Install test dependencies diff --git a/setup.py b/setup.py index cf671be8..c9dee6f1 100644 --- a/setup.py +++ b/setup.py @@ -16,11 +16,12 @@ with open("README.rst") as readme_file: readme = readme_file.read() -with open("HISTORY.rst") as history_file: - history = history_file.read() +with open("HISTORY.rst", "rb") as history_file: + # Norwegian characters in HISTORY.rst + history = history_file.read().decode("UTF-8") REQUIREMENTS = [ - "libecl", + # "libecl", # Temporarily removed from requirements to solve problems elsewhere "numpy", "pandas>0.23.0", "pyyaml>=5.1", diff --git a/src/fmu/ensemble/ensemble.py b/src/fmu/ensemble/ensemble.py index 480d57a8..57a90072 100644 --- a/src/fmu/ensemble/ensemble.py +++ b/src/fmu/ensemble/ensemble.py @@ -797,14 +797,15 @@ def load_smry( memory simultaneously start_date (str or date): First date to include. Dates prior to this date will be dropped, supplied - start_date will always be included. If string, use + start_date will always be included. Overridden if time_index + is 'first' or 'last'. If string, use ISO-format, YYYY-MM-DD. ISO-format, YYYY-MM-DD. end_date (str or date): Last date to be included. Dates past this date will be dropped, supplied - end_date will always be included. Overriden if time_index - is 'last'. If string, use ISO-format, YYYY-MM-DD. - include_restart (boolean): boolean sent to libecl for wheter restarts - files should be traversed + end_date will always be included. Overridden if time_index + is 'first' or 'last'. If string, use ISO-format, YYYY-MM-DD. + include_restart (boolean): boolean sent to libecl for whether restart + files should be traversed. Returns: pd.DataFame: Summary vectors for the ensemble, or a dict of dataframes if stacked=False. @@ -1079,6 +1080,7 @@ def get_smry_dates( yield the sorted union of all valid timesteps for all realizations. Other valid options are 'daily', 'monthly' and 'yearly'. + 'first' will give out the first date (minimum). 'last' will give out the last date (maximum). normalize: Whether to normalize backwards at the start and forwards at the end to ensure the raw @@ -1086,14 +1088,15 @@ def get_smry_dates( start_date: str or date with first date to include. Dates prior to this date will be dropped, supplied start_date will always be included. Overrides - normalized dates. If string, use ISO-format, YYYY-MM-DD. + normalized dates. Overridden if freq is 'first' or 'last'. + If string, use ISO-format, YYYY-MM-DD. end_date: str or date with last date to be included. Dates past this date will be dropped, supplied end_date will always be included. Overrides - normalized dates. Overriden if freq is 'last'. + normalized dates. Overridden if freq is 'first' or 'last'. If string, use ISO-format, YYYY-MM-DD. - include_restart: boolean sent to libecl for wheter restarts - files should be traversed + include_restart: boolean sent to libecl for whether restart + files should be traversed. Returns: list of datetimes. Empty list if no data found. @@ -1164,6 +1167,9 @@ def _get_smry_dates(eclsumsdates, freq, normalize, start_date, end_date): if freq == "last": end_date = max([max(x) for x in eclsumsdates]).date() return [end_date] + if freq == "first": + start_date = min([min(x) for x in eclsumsdates]).date() + return [start_date] # These are datetime.datetime, not datetime.date start_smry = min([min(x) for x in eclsumsdates]) end_smry = max([max(x) for x in eclsumsdates]) @@ -1235,12 +1241,12 @@ def get_smry_stats( object in memory after data has been loaded. start_date: str or date with first date to include. Dates prior to this date will be dropped, supplied - start_date will always be included. If string, - use ISO-format, YYYY-MM-DD. + start_date will always be included. Overridden if time_index + is 'first' or 'last'. If string, use ISO-format, YYYY-MM-DD. end_date: str or date with last date to be included. Dates past this date will be dropped, supplied - end_date will always be included. Overriden if time_index - is 'last'. If string, use ISO-format, YYYY-MM-DD. + end_date will always be included. Overridden if time_index + is 'first' or 'last'. If string, use ISO-format, YYYY-MM-DD. Returns: A MultiIndex dataframe. Outer index is 'minimum', 'maximum', 'mean', 'p10', 'p90', inner index are the dates. Column names @@ -1541,13 +1547,14 @@ def get_smry( not enough memory to keep all summary files in memory. start_date: str or date with first date to include. Dates prior to this date will be dropped, supplied - start_date will always be included. + start_date will always be included. Overridden if time_index + is 'first' or 'last'. end_date: str or date with last date to be included. Dates past this date will be dropped, supplied - end_date will always be included. Overriden if time_index - is 'last'. - include_restart: boolean sent to libecl for wheter restarts - files should be traversed + end_date will always be included. Overridden if time_index + is 'first' or 'last'. + include_restart: boolean sent to libecl for whether restart + files should be traversed. Returns: A DataFame of summary vectors for the ensemble. The column diff --git a/src/fmu/ensemble/ensembleset.py b/src/fmu/ensemble/ensembleset.py index 4dd336d0..8761bd74 100644 --- a/src/fmu/ensemble/ensembleset.py +++ b/src/fmu/ensemble/ensembleset.py @@ -582,11 +582,12 @@ def load_smry( memory simultaneously start_date: str or date with first date to include. Dates prior to this date will be dropped, supplied - start_date will always be included. + start_date will always be included. Overridden if time_index + is 'first' or 'last'. end_date: str or date with last date to be included. Dates past this date will be dropped, supplied - end_date will always be included. Overriden if time_index - is 'last'. + end_date will always be included. Overridden if time_index + is 'first' or 'last'. Returns: A DataFame of summary vectors for the ensembleset. @@ -632,11 +633,12 @@ def get_smry( operations start_date: str or date with first date to include. Dates prior to this date will be dropped, supplied - start_date will always be included. + start_date will always be included. Overridden if time_index + is 'first' or 'last'. end_date: str or date with last date to be included. Dates past this date will be dropped, supplied - end_date will always be included. Overriden if time_index - is 'last'. + end_date will always be included. Overridden if time_index + is 'first' or 'last'. Returns: A DataFame of summary vectors for the EnsembleSet. The column ENSEMBLE will distinguish the different ensembles by their @@ -672,11 +674,12 @@ def get_smry_dates( memory simultaneously start_date: str or date with first date to include. Dates prior to this date will be dropped, supplied - start_date will always be included. + start_date will always be included. Overridden if time_index + is 'first' or 'last'. end_date: str or date with last date to be included. Dates past this date will be dropped, supplied - end_date will always be included. Overriden if time_index - is 'last'. + end_date will always be included. Overridden if time_index + is 'first' or 'last'. Returns: list of datetime.date. """ diff --git a/src/fmu/ensemble/observations.py b/src/fmu/ensemble/observations.py index c1713146..2f71fe19 100644 --- a/src/fmu/ensemble/observations.py +++ b/src/fmu/ensemble/observations.py @@ -490,6 +490,7 @@ def _clean_observations(self): "report", "yearly", "daily", + "first", "last", "monthly", ] and not isinstance(unit["time_index"], datetime.datetime): diff --git a/src/fmu/ensemble/realization.py b/src/fmu/ensemble/realization.py index d77c48a6..e4b245be 100644 --- a/src/fmu/ensemble/realization.py +++ b/src/fmu/ensemble/realization.py @@ -962,8 +962,8 @@ def get_eclsum(self, cache=True, include_restart=True): cache: boolean indicating whether we should keep an object reference to the EclSum object. Set to false if you need to conserve memory. - include_restart: boolean sent to libecl for whether restarts - files should be traversed + include_restart: boolean sent to libecl for whether restart + files should be traversed. Returns: EclSum: object representing the summary file. None if @@ -1036,8 +1036,8 @@ def load_smry( 'share/results/tables/unsmry--.csv' - where is among 'yearly', 'monthly', 'daily', 'last' or - 'raw' (meaning the raw dates in the SMRY file), depending + where is among 'yearly', 'monthly', 'daily', 'first', + 'last' or 'raw' (meaning the raw dates in the SMRY file), depending on the chosen time_index. If a custom time_index (list of datetime) was supplied, will be called 'custom'. @@ -1047,8 +1047,8 @@ def load_smry( Args: time_index: string indicating a resampling frequency, - 'yearly', 'monthly', 'daily', 'last' or 'raw', the latter will - return the simulated report steps (also default). + 'yearly', 'monthly', 'daily', 'first', 'last' or 'raw', the + latter will return the simulated report steps (also default). If a list of DateTime is supplied, data will be resampled to these. column_keys: list of column key wildcards. None means everything. @@ -1056,13 +1056,14 @@ def load_smry( object in memory after data has been loaded. start_date: str or date with first date to include. Dates prior to this date will be dropped, supplied - start_date will always be included. + start_date will always be included. Overridden if time_index + is 'first' or 'last'. end_date: str or date with last date to be included. Dates past this date will be dropped, supplied - end_date will always be included. Overriden if time_index - is 'last'. - include_restart: boolean sent to libecl for wheter restarts - files should be traversed + end_date will always be included. Overridden if time_index + is 'first' or 'last'. + include_restart: boolean sent to libecl for whether restart + files should be traversed. Returns: DataFrame with summary keys as columns and dates as indices. @@ -1125,8 +1126,8 @@ def get_smry( Arguments: time_index: string indicating a resampling frequency, - 'yearly', 'monthly', 'daily', 'last' or 'raw', the latter will - return the simulated report steps (also default). + 'yearly', 'monthly', 'daily', 'first', 'last' or 'raw', the + latter will return the simulated report steps (also default). If a list of DateTime is supplied, data will be resampled to these. If a date in ISO-8601 format is supplied, that is used as a single date. @@ -1135,11 +1136,12 @@ def get_smry( object in memory after data has been loaded. start_date: str or date with first date to include. Dates prior to this date will be dropped, supplied - start_date will always be included. + start_date will always be included. Overridden if time_index + is 'first' or 'last'. end_date: str or date with last date to be included. Dates past this date will be dropped, supplied - end_date will always be included. Overriden if time_index - is 'last'. + end_date will always be included. Overridden if time_index + is 'first' or 'last'. Returns empty dataframe if there is no summary file, or if the column_keys are not existing. @@ -1438,19 +1440,22 @@ def get_smry_dates( yield the sorted union of all valid timesteps for all realizations. Other valid options are 'daily', 'monthly' and 'yearly'. + 'first' will give out the first date (minimum) and 'last' will give out the last date (maximum), - as a list with one element. + both as lists with one element. normalize: Whether to normalize backwards at the start and forwards at the end to ensure the raw date range is covered. start_date: str or date with first date to include Dates prior to this date will be dropped, supplied start_date will always be included. Overrides - normalized dates. + normalized dates. Overridden if freq is 'first' + or 'last'. end_date: str or date with last date to be included. Dates past this date will be dropped, supplied end_date will always be included. Overrides - normalized dates. Overriden if freq is 'last'. + normalized dates. Overridden if freq is 'first' + or 'last'. Returns: list of datetimes. None if no summary data is available. """ @@ -1808,7 +1813,7 @@ def normalize_dates(start_date, end_date, freq): elif freq == "daily": # This we don't need to normalize, but we should not give any warnings pass - elif freq == "last": + elif freq == "first" or freq == "last": # This we don't need to normalize, but we should not give any warnings pass else: diff --git a/src/fmu/ensemble/virtualrealization.py b/src/fmu/ensemble/virtualrealization.py index 9b03a294..ef0c95e7 100644 --- a/src/fmu/ensemble/virtualrealization.py +++ b/src/fmu/ensemble/virtualrealization.py @@ -463,8 +463,9 @@ def get_smry_dates(self, freq="monthly", normalize=False): freq: string denoting requested frequency for the list of datetimes. 'daily', 'monthly' and 'yearly'. + 'first' will give out the first date (minimum) and 'last' will give out the last date (maximum), - as a list with one element. + both as lists with one element. normalize: Whether to normalize backwards at the start and forwards at the end to ensure the entire date range is covered. @@ -490,6 +491,10 @@ def get_smry_dates(self, freq="monthly", normalize=False): available_dates = [pd.to_datetime(x) for x in list(available_dates)] start_date = min(available_dates) end_date = max(available_dates) + if freq == "first": + return [start_date.date()] + if freq == "last": + return [end_date.date()] pd_freq_mnenomics = {"monthly": "MS", "yearly": "YS", "daily": "D"} if normalize: raise NotImplementedError @@ -565,7 +570,10 @@ def _glob_smry_keys(self, column_keys): available_smry = [x for x in self.keys() if "unsmry" in x] if not available_smry: - raise ValueError("No summary data to glob from") + raise ValueError( + "No summary data available. Use load_smry() " + "before making a virtual realization." + ) # Merge all internalized columns: available_keys = set() diff --git a/tests/test_ensemble.py b/tests/test_ensemble.py index a55039f8..0e0689d6 100644 --- a/tests/test_ensemble.py +++ b/tests/test_ensemble.py @@ -392,9 +392,13 @@ def test_ensemble_ecl(): assert len(reekensemble.get_smry_dates(freq="yearly")) == 5 assert len(reekensemble.get_smry_dates(freq="monthly")) == 38 assert len(reekensemble.get_smry_dates(freq="daily")) == 1098 + assert len(reekensemble.get_smry_dates(freq="first")) == 1 assert len(reekensemble.get_smry_dates(freq="last")) == 1 + assert reekensemble.get_smry_dates(freq="first") == reekensemble.get_smry_dates( + freq="first", start_date="1900-01-01", end_date="2050-02-01" + ) assert reekensemble.get_smry_dates(freq="last") == reekensemble.get_smry_dates( - freq="last", end_date="2050-02-01" + freq="last", start_date="1900-01-01", end_date="2050-02-01" ) assert str(reekensemble.get_smry_dates(freq="report")[-1]) == "2003-01-02 00:00:00" @@ -402,6 +406,7 @@ def test_ensemble_ecl(): assert str(reekensemble.get_smry_dates(freq="yearly")[-1]) == "2004-01-01" assert str(reekensemble.get_smry_dates(freq="monthly")[-1]) == "2003-02-01" assert str(reekensemble.get_smry_dates(freq="daily")[-1]) == "2003-01-02" + assert str(reekensemble.get_smry_dates(freq="first")[-1]) == "2000-01-01" assert str(reekensemble.get_smry_dates(freq="last")[-1]) == "2003-01-02" assert ( @@ -431,6 +436,9 @@ def test_ensemble_ecl(): # Check that we can shortcut get_smry_dates: assert len(reekensemble.load_smry(column_keys=["FOPT"], time_index="yearly")) == 25 + assert len(reekensemble.load_smry(column_keys=["FOPR"], time_index="first")) == 5 + assert isinstance(reekensemble.get_df("unsmry--first.csv"), pd.DataFrame) + assert len(reekensemble.load_smry(column_keys=["FOPR"], time_index="last")) == 5 assert isinstance(reekensemble.get_df("unsmry--last.csv"), pd.DataFrame) diff --git a/tests/test_realization.py b/tests/test_realization.py index ffdf0f34..3781b9ed 100644 --- a/tests/test_realization.py +++ b/tests/test_realization.py @@ -546,7 +546,7 @@ def test_singlereal_ecl(tmp="TMP"): ) == 2 ) - # Date normalization should be overriden here: + # Date normalization should be overridden here: assert ( len( real.get_smry_dates( diff --git a/tests/test_virtualensemble.py b/tests/test_virtualensemble.py index 04f02141..8542f649 100644 --- a/tests/test_virtualensemble.py +++ b/tests/test_virtualensemble.py @@ -92,6 +92,19 @@ def test_virtualensemble(): assert "REAL" in fopt.columns assert "FGPT" not in fopt.columns assert len(fopt) == 25 + monthly_smry = vens.get_smry(time_index="monthly") + pd.testing.assert_series_equal( + vens.get_smry(time_index="first")["FOIP"].reset_index(drop=True), + monthly_smry[monthly_smry["DATE"] == min(monthly_smry["DATE"])][ + "FOIP" + ].reset_index(drop=True), + ) + pd.testing.assert_series_equal( + vens.get_smry(time_index="last")["FOIP"].reset_index(drop=True), + monthly_smry[monthly_smry["DATE"] == max(monthly_smry["DATE"])][ + "FOIP" + ].reset_index(drop=True), + ) # Check that we can default get_smry() alldefaults = vens.get_smry() diff --git a/tests/test_virtualrealization.py b/tests/test_virtualrealization.py index 6862d0c0..6e981bb0 100644 --- a/tests/test_virtualrealization.py +++ b/tests/test_virtualrealization.py @@ -200,6 +200,23 @@ def test_get_smry(): repeating ) + # Test that time_index='first' and ='last' is supported on a virtual realization + real = ensemble.ScratchRealization(realdir) + monthly_smry = real.load_smry(time_index="monthly", column_keys=["FOIP"]) + vreal = real.to_virtual() + pd.testing.assert_series_equal( + vreal.get_smry(time_index="first")["FOIP"].reset_index(drop=True), + monthly_smry[monthly_smry["DATE"] == min(monthly_smry["DATE"])][ + "FOIP" + ].reset_index(drop=True), + ) + pd.testing.assert_series_equal( + vreal.get_smry(time_index="last")["FOIP"].reset_index(drop=True), + monthly_smry[monthly_smry["DATE"] == max(monthly_smry["DATE"])][ + "FOIP" + ].reset_index(drop=True), + ) + def test_get_smry2(): """More tests for get_smry, with more choices in