Linting fixes and tidying

modl-uclouvain · Jul 26, 2024 · 77c0ebb · 77c0ebb
1 parent 500251f
commit 77c0ebb
Show file tree

Hide file tree

Showing 6 changed files with 85 additions and 152 deletions.
diff --git a/src/re2fractive/acquisition/__init__.py b/src/re2fractive/acquisition/__init__.py
@@ -1 +1,3 @@
 from .generic import extremise_expected_value, random_selection
+
+__all__ = ("extremise_expected_value", "random_selection")
diff --git a/src/re2fractive/acquisition/generic.py b/src/re2fractive/acquisition/generic.py
@@ -1,11 +1,11 @@
 """A place to collect generic acquisition functions."""
 
-import numpy as np
 import random
-from re2fractive.acquisition.rppf import rppf_y
 
+import numpy as np
 from optimade.adapters import Structure
 
+from re2fractive.acquisition.rppf import rppf_y
 
 
 def exploration(
@@ -101,12 +101,12 @@ def random_selection(
 def rppf(
     candidate_pool: list[Structure],
     decorated_structures: list[Structure],
-    properties: dict[str,str],
+    properties: dict[str, str],
     include_std: bool = False,
     num_to_select: int = 1,
-    rho:   float = 0.0,
-    wind:  float = 0.01,
-    Tstar: float = 0.01
+    rho: float = 0.0,
+    wind: float = 0.01,
+    Tstar: float = 0.01,
 ):
     """Returns the top `num_to_select` structures according to the Most Isolated Pareto structure Score (MIPS) calculated from projection free energy generalized to any dimension (number of properties). Does not take uncertainty into account. TODO maybe?
 
@@ -129,26 +129,29 @@ def rppf(
     orders = list(properties.values())
     for order in orders:
         if order not in ("max", "min"):
-            raise RuntimeError("The values of 'properties' must be either 'max' or 'min'")
+            raise RuntimeError(
+                "The values of 'properties' must be either 'max' or 'min'"
+            )
 
     # Replace max and min by 1 and -1 since rppf minimizes the objectives by default
-    properties_sign = {key: -1 if value == 'max' else 1 for key, value in properties.items()}
+    properties_sign = {
+        key: -1 if value == "max" else 1 for key, value in properties.items()
+    }
 
     y = np.empty((len(candidate_pool), len(properties_sign)))
     for i, s in enumerate(candidate_pool):
         for j, (key, value) in enumerate(properties_sign.items()):
-            y[i,j] = s["predictions"].get(key, 0.0) * value
+            y[i, j] = s["predictions"].get(key, 0.0) * value
             if include_std:
-                y[i,j] -= s["predictions"].get(f"{key}_std", 0.0)
-
+                y[i, j] -= s["predictions"].get(f"{key}_std", 0.0)
 
     ranking_index, ranking_MIPS = rppf_y(y, rho=rho, wind=wind, Tstar=Tstar)
-    
+
     selected = []
     for count, idx in enumerate(ranking_index):
         selected.append(candidate_pool[idx])
         count += 1
-        if count==num_to_select:
+        if count == num_to_select:
             break
 
     return selected
diff --git a/src/re2fractive/acquisition/rppf.py b/src/re2fractive/acquisition/rppf.py
@@ -1,50 +1,61 @@
 import numpy as np
+
 import re2fractive.acquisition.utils as utils
 
-def rppf_y(y, rho=0.0, wind=0.01, Tstar = 0.01):
 
-    datanum=len(y)
+def rppf_y(y, rho=0.0, wind=0.01, Tstar=0.01):
     objnum = len(y[0])
-    
-    #min-max normalization for each objective function
+
+    # min-max normalization for each objective function
     E = y.T
-    E_minmax = (E - np.min(E, axis=1, keepdims=True))/(np.max(E, axis=1, keepdims=True) - np.min(E, axis=1, keepdims=True))
-
+    E_minmax = (E - np.min(E, axis=1, keepdims=True)) / (
+        np.max(E, axis=1, keepdims=True) - np.min(E, axis=1, keepdims=True)
+    )
+
     # Definition of alpha
     weights = utils.generate_weights(M=objnum, wind=wind)
-    
+
     ###################################
     ##### free energy evaluations #####
     ###################################
-    
+
     # Eq.(5) - augmented weighted Tchebycheff
     H_all = utils.matmul_max(weights, E_minmax)
-    H_all += rho * np.ones(weights.shape)@E_minmax
-
-
+    H_all += rho * np.ones(weights.shape) @ E_minmax
+
     # Eq.(6) - min-max standardization
-    H_all_minmax = (H_all - np.min(H_all, axis=1, keepdims=True))/(np.max(H_all, axis=1, keepdims=True) - np.min(H_all, axis=1, keepdims=True))
-
+    H_all_minmax = (H_all - np.min(H_all, axis=1, keepdims=True)) / (
+        np.max(H_all, axis=1, keepdims=True) - np.min(H_all, axis=1, keepdims=True)
+    )
+
     # Eq.(1),(5) - one of the Pareto solutions is located at the optimal solution depending on α
     pareto_list = np.argmin(H_all, axis=1)
-    
+
     # Eq.(7)-(8) - calculation of MIPS score
-
-    FT = -Tstar * np.log(np.sum(np.exp(-H_all_minmax/Tstar - np.max(-H_all_minmax/Tstar, axis=1, keepdims=True)), axis=1)) + np.max(-H_all_minmax/Tstar, axis=1)
-
+
+    FT = -Tstar * np.log(
+        np.sum(
+            np.exp(
+                -H_all_minmax / Tstar
+                - np.max(-H_all_minmax / Tstar, axis=1, keepdims=True)
+            ),
+            axis=1,
+        )
+    ) + np.max(-H_all_minmax / Tstar, axis=1)
+
     #####################
     ##### opt value #####
     #####################
-    
-    arg_index = np.argsort(FT)[::-1]                       
+
+    arg_index = np.argsort(FT)[::-1]
     sorted_pareto = pareto_list[arg_index]
-                  
-    # Get unique values without sorting                    
+
+    # Get unique values without sorting
     uniques, indices = np.unique(sorted_pareto, return_index=True)
-                  
+
     # Sort the indices to get the original order of the sorted pareto list
     sorted_indices = np.sort(indices)
     ranking_index = sorted_pareto[sorted_indices]
     ranking_MIPS = FT[arg_index][sorted_indices]
-    
+
     return ranking_index, ranking_MIPS
diff --git a/src/re2fractive/acquisition/utils.py b/src/re2fractive/acquisition/utils.py
@@ -1,23 +1,28 @@
 import itertools
+
 import numpy as np
 
+
 def generate_weights(M, wind=0.01):
-    window=int(1/wind)
+    window = int(1 / wind)
     weights = []
-    for combination in itertools.product(range(window+1), repeat=M):
+    for combination in itertools.product(range(window + 1), repeat=M):
         if sum(combination) == window:
-            weight = [val*(100/window) / 100 for val in combination]
-            #weight = [round(val, 5) for val in weight]
+            weight = [val * (100 / window) / 100 for val in combination]
+            # weight = [round(val, 5) for val in weight]
             weights.append(weight)
     return np.array(weights)
 
+
 def matmul_max(matrix1, matrix2):
     rows1, cols1 = len(matrix1), len(matrix1[0])
     rows2, cols2 = len(matrix2), len(matrix2[0])
 
     # Check if the matrices can be multiplied
     if cols1 != rows2:
-        raise ValueError("Number of columns in the first matrix must equal the number of rows in the second matrix.")
+        raise ValueError(
+            "Number of columns in the first matrix must equal the number of rows in the second matrix."
+        )
 
     # Initialize the result matrix with zeros
     result = [[0] * cols2 for _ in range(rows1)]

diff --git a/src/re2fractive/campaign.py b/src/re2fractive/campaign.py
@@ -182,7 +182,7 @@ class Campaign:
     campaign_uuid: str | None = None
     """A UUID that uniquely identifies this campaign."""
 
-    featurizer: BatchableMODFeaturizer = field(default=MatminerFastFeaturizer)
+    featurizer: BatchableMODFeaturizer = field(default=MatminerFastFeaturizer)  # type: ignore[assignment]
     """The featurizer to use during learning."""
 
     @classmethod
@@ -212,9 +212,9 @@ def new_campaign_from_dataset(
             properties=list(initial_dataset.properties.keys()),
             model_cls=EnsembleMODNetModel,
             learning_strategy=learning_strategy,
-            datasets=[type(initial_dataset)] + datasets
+            datasets=[type(initial_dataset)] + datasets  # type: ignore[arg-type]
             if datasets
-            else [type(initial_dataset)],
+            else [type(initial_dataset)],  # type: ignore[list-item]
             campaign_uuid=campaign_uuid,
         )
 
@@ -261,14 +261,16 @@ def first_step(self, model_id: int | None = None):
         self.finalize_epoch(holdout_metrics, model_id, design_space)
 
     def finalize_epoch(self, holdout_metrics, model_id, design_space, results_df=None):
-        epoch = {
-            "model_metrics": holdout_metrics,
-            "model_id": model_id,
-            "design_space": [d.to_dict(orient="index") for d in design_space],
-            "selected": results_df.to_dict(orient="index")
-            if results_df is not None
-            else None,
-        }
+        epoch = Epoch(
+            **{
+                "model_metrics": holdout_metrics,
+                "model_id": model_id,
+                "design_space": [d.to_dict(orient="index") for d in design_space],
+                "selected": results_df.to_dict(orient="index")
+                if results_df is not None
+                else None,
+            }
+        )
 
         self.epochs.append(epoch)
         self.checkpoint()
@@ -418,6 +420,8 @@ def march(self, wait: bool = True) -> None:
 
         print(f"Gathering results for epoch {this_epoch_index}")
         results_df = self.gather_results(this_epoch_index)
+        if results_df is None:
+            raise RuntimeError(f"No results found for epoch {this_epoch_index}")
 
         print(f"Gathering features for epoch {this_epoch_index}")
         featurized_df, target_df = self.gather_features(results_df)
@@ -430,11 +434,13 @@ def march(self, wait: bool = True) -> None:
         self.finalize_epoch(holdout_metrics, model_id, design_space, results_df)
 
     def start_new_epoch(self):
-        design_space = self.epochs[-1]["design_space"]
-        ranking = self.make_selection(design_space)
+        raise NotImplementedError
+
+    #     design_space = self.epochs[-1]["design_space"]
+    # ranking = self.make_selection(design_space)
 
-        # # Submit or get pre-computed trials from database
-        # new_calcs = self.submit_oracle(ranking)
+    # # Submit or get pre-computed trials from database
+    # new_calcs = self.submit_oracle(ranking)
 
     def _epoch_finished(self, epoch_index: int) -> bool:
         """Check the epoch dir for results from all calculations."""
@@ -598,8 +604,8 @@ def parcel_up_structures(self) -> None:
         for ind, dataset in enumerate(self.datasets):
             self.datasets[ind] = dataset.load()
 
-        for epoch in results:
-            for i, row in epoch.iterrows():
+        for r in results:
+            for i, row in r.iterrows():
                 structure = None
                 for dataset in self.datasets:
                     # find structure

diff --git a/src/re2fractive/run.py b/src/re2fractive/run.py