Skip to content

Commit

Permalink
Linting fixes and tidying
Browse files Browse the repository at this point in the history
  • Loading branch information
ml-evs committed Jul 26, 2024
1 parent 500251f commit 77c0ebb
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 152 deletions.
2 changes: 2 additions & 0 deletions src/re2fractive/acquisition/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
from .generic import extremise_expected_value, random_selection

__all__ = ("extremise_expected_value", "random_selection")
29 changes: 16 additions & 13 deletions src/re2fractive/acquisition/generic.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"""A place to collect generic acquisition functions."""

import numpy as np
import random
from re2fractive.acquisition.rppf import rppf_y

import numpy as np
from optimade.adapters import Structure

from re2fractive.acquisition.rppf import rppf_y


def exploration(
Expand Down Expand Up @@ -101,12 +101,12 @@ def random_selection(
def rppf(
candidate_pool: list[Structure],
decorated_structures: list[Structure],
properties: dict[str,str],
properties: dict[str, str],
include_std: bool = False,
num_to_select: int = 1,
rho: float = 0.0,
wind: float = 0.01,
Tstar: float = 0.01
rho: float = 0.0,
wind: float = 0.01,
Tstar: float = 0.01,
):
"""Returns the top `num_to_select` structures according to the Most Isolated Pareto structure Score (MIPS) calculated from projection free energy generalized to any dimension (number of properties). Does not take uncertainty into account. TODO maybe?
Expand All @@ -129,26 +129,29 @@ def rppf(
orders = list(properties.values())
for order in orders:
if order not in ("max", "min"):
raise RuntimeError("The values of 'properties' must be either 'max' or 'min'")
raise RuntimeError(
"The values of 'properties' must be either 'max' or 'min'"
)

# Replace max and min by 1 and -1 since rppf minimizes the objectives by default
properties_sign = {key: -1 if value == 'max' else 1 for key, value in properties.items()}
properties_sign = {
key: -1 if value == "max" else 1 for key, value in properties.items()
}

y = np.empty((len(candidate_pool), len(properties_sign)))
for i, s in enumerate(candidate_pool):
for j, (key, value) in enumerate(properties_sign.items()):
y[i,j] = s["predictions"].get(key, 0.0) * value
y[i, j] = s["predictions"].get(key, 0.0) * value
if include_std:
y[i,j] -= s["predictions"].get(f"{key}_std", 0.0)

y[i, j] -= s["predictions"].get(f"{key}_std", 0.0)

ranking_index, ranking_MIPS = rppf_y(y, rho=rho, wind=wind, Tstar=Tstar)

selected = []
for count, idx in enumerate(ranking_index):
selected.append(candidate_pool[idx])
count += 1
if count==num_to_select:
if count == num_to_select:
break

return selected
57 changes: 34 additions & 23 deletions src/re2fractive/acquisition/rppf.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,61 @@
import numpy as np

import re2fractive.acquisition.utils as utils

def rppf_y(y, rho=0.0, wind=0.01, Tstar = 0.01):

datanum=len(y)
def rppf_y(y, rho=0.0, wind=0.01, Tstar=0.01):
objnum = len(y[0])
#min-max normalization for each objective function

# min-max normalization for each objective function
E = y.T
E_minmax = (E - np.min(E, axis=1, keepdims=True))/(np.max(E, axis=1, keepdims=True) - np.min(E, axis=1, keepdims=True))

E_minmax = (E - np.min(E, axis=1, keepdims=True)) / (
np.max(E, axis=1, keepdims=True) - np.min(E, axis=1, keepdims=True)
)

# Definition of alpha
weights = utils.generate_weights(M=objnum, wind=wind)

###################################
##### free energy evaluations #####
###################################

# Eq.(5) - augmented weighted Tchebycheff
H_all = utils.matmul_max(weights, E_minmax)
H_all += rho * np.ones(weights.shape)@E_minmax


H_all += rho * np.ones(weights.shape) @ E_minmax

# Eq.(6) - min-max standardization
H_all_minmax = (H_all - np.min(H_all, axis=1, keepdims=True))/(np.max(H_all, axis=1, keepdims=True) - np.min(H_all, axis=1, keepdims=True))

H_all_minmax = (H_all - np.min(H_all, axis=1, keepdims=True)) / (
np.max(H_all, axis=1, keepdims=True) - np.min(H_all, axis=1, keepdims=True)
)

# Eq.(1),(5) - one of the Pareto solutions is located at the optimal solution depending on α
pareto_list = np.argmin(H_all, axis=1)

# Eq.(7)-(8) - calculation of MIPS score

FT = -Tstar * np.log(np.sum(np.exp(-H_all_minmax/Tstar - np.max(-H_all_minmax/Tstar, axis=1, keepdims=True)), axis=1)) + np.max(-H_all_minmax/Tstar, axis=1)


FT = -Tstar * np.log(
np.sum(
np.exp(
-H_all_minmax / Tstar
- np.max(-H_all_minmax / Tstar, axis=1, keepdims=True)
),
axis=1,
)
) + np.max(-H_all_minmax / Tstar, axis=1)

#####################
##### opt value #####
#####################
arg_index = np.argsort(FT)[::-1]

arg_index = np.argsort(FT)[::-1]
sorted_pareto = pareto_list[arg_index]
# Get unique values without sorting

# Get unique values without sorting
uniques, indices = np.unique(sorted_pareto, return_index=True)

# Sort the indices to get the original order of the sorted pareto list
sorted_indices = np.sort(indices)
ranking_index = sorted_pareto[sorted_indices]
ranking_MIPS = FT[arg_index][sorted_indices]

return ranking_index, ranking_MIPS
15 changes: 10 additions & 5 deletions src/re2fractive/acquisition/utils.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,28 @@
import itertools

import numpy as np


def generate_weights(M, wind=0.01):
window=int(1/wind)
window = int(1 / wind)
weights = []
for combination in itertools.product(range(window+1), repeat=M):
for combination in itertools.product(range(window + 1), repeat=M):
if sum(combination) == window:
weight = [val*(100/window) / 100 for val in combination]
#weight = [round(val, 5) for val in weight]
weight = [val * (100 / window) / 100 for val in combination]
# weight = [round(val, 5) for val in weight]
weights.append(weight)
return np.array(weights)


def matmul_max(matrix1, matrix2):
rows1, cols1 = len(matrix1), len(matrix1[0])
rows2, cols2 = len(matrix2), len(matrix2[0])

# Check if the matrices can be multiplied
if cols1 != rows2:
raise ValueError("Number of columns in the first matrix must equal the number of rows in the second matrix.")
raise ValueError(
"Number of columns in the first matrix must equal the number of rows in the second matrix."
)

# Initialize the result matrix with zeros
result = [[0] * cols2 for _ in range(rows1)]
Expand Down
40 changes: 23 additions & 17 deletions src/re2fractive/campaign.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ class Campaign:
campaign_uuid: str | None = None
"""A UUID that uniquely identifies this campaign."""

featurizer: BatchableMODFeaturizer = field(default=MatminerFastFeaturizer)
featurizer: BatchableMODFeaturizer = field(default=MatminerFastFeaturizer) # type: ignore[assignment]
"""The featurizer to use during learning."""

@classmethod
Expand Down Expand Up @@ -212,9 +212,9 @@ def new_campaign_from_dataset(
properties=list(initial_dataset.properties.keys()),
model_cls=EnsembleMODNetModel,
learning_strategy=learning_strategy,
datasets=[type(initial_dataset)] + datasets
datasets=[type(initial_dataset)] + datasets # type: ignore[arg-type]
if datasets
else [type(initial_dataset)],
else [type(initial_dataset)], # type: ignore[list-item]
campaign_uuid=campaign_uuid,
)

Expand Down Expand Up @@ -261,14 +261,16 @@ def first_step(self, model_id: int | None = None):
self.finalize_epoch(holdout_metrics, model_id, design_space)

def finalize_epoch(self, holdout_metrics, model_id, design_space, results_df=None):
epoch = {
"model_metrics": holdout_metrics,
"model_id": model_id,
"design_space": [d.to_dict(orient="index") for d in design_space],
"selected": results_df.to_dict(orient="index")
if results_df is not None
else None,
}
epoch = Epoch(
**{
"model_metrics": holdout_metrics,
"model_id": model_id,
"design_space": [d.to_dict(orient="index") for d in design_space],
"selected": results_df.to_dict(orient="index")
if results_df is not None
else None,
}
)

self.epochs.append(epoch)
self.checkpoint()
Expand Down Expand Up @@ -418,6 +420,8 @@ def march(self, wait: bool = True) -> None:

print(f"Gathering results for epoch {this_epoch_index}")
results_df = self.gather_results(this_epoch_index)
if results_df is None:
raise RuntimeError(f"No results found for epoch {this_epoch_index}")

print(f"Gathering features for epoch {this_epoch_index}")
featurized_df, target_df = self.gather_features(results_df)
Expand All @@ -430,11 +434,13 @@ def march(self, wait: bool = True) -> None:
self.finalize_epoch(holdout_metrics, model_id, design_space, results_df)

def start_new_epoch(self):
design_space = self.epochs[-1]["design_space"]
ranking = self.make_selection(design_space)
raise NotImplementedError

# design_space = self.epochs[-1]["design_space"]
# ranking = self.make_selection(design_space)

# # Submit or get pre-computed trials from database
# new_calcs = self.submit_oracle(ranking)
# # Submit or get pre-computed trials from database
# new_calcs = self.submit_oracle(ranking)

def _epoch_finished(self, epoch_index: int) -> bool:
"""Check the epoch dir for results from all calculations."""
Expand Down Expand Up @@ -598,8 +604,8 @@ def parcel_up_structures(self) -> None:
for ind, dataset in enumerate(self.datasets):
self.datasets[ind] = dataset.load()

for epoch in results:
for i, row in epoch.iterrows():
for r in results:
for i, row in r.iterrows():
structure = None
for dataset in self.datasets:
# find structure
Expand Down
94 changes: 0 additions & 94 deletions src/re2fractive/run.py

This file was deleted.

0 comments on commit 77c0ebb

Please sign in to comment.