Skip to content

Fixing inconsistent results when using seed in parmest examples #3621

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Jul 1, 2025
Merged
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -39,7 +39,7 @@ def main():
obj, theta = pest.theta_est()

# Parameter estimation with bootstrap resampling
bootstrap_theta = pest.theta_est_bootstrap(50)
bootstrap_theta = pest.theta_est_bootstrap(50, seed=524)

# Plot results
parmest.graphics.pairwise_plot(bootstrap_theta, title="Bootstrap theta")
Original file line number Diff line number Diff line change
@@ -39,11 +39,11 @@ def main():
obj, theta = pest.theta_est()

# Bootstrapping
bootstrap_theta = pest.theta_est_bootstrap(10)
bootstrap_theta = pest.theta_est_bootstrap(10, seed=524)
print(bootstrap_theta)

# Confidence region test
CR = pest.confidence_region_test(bootstrap_theta, "MVN", [0.5, 0.75, 1.0])
CR = pest.confidence_region_test(bootstrap_theta, "MVN", [0.5, 0.75, 1.0], seed=524)
print(CR)


Original file line number Diff line number Diff line change
@@ -24,6 +24,9 @@ def main():
file_name = abspath(join(file_dirname, "reactor_data.csv"))
data = pd.read_csv(file_name)

seed = 524 # Set seed for reproducibility
np.random.seed(seed) # Set seed for reproducibility

# Create more data for the example
N = 50
df_std = data.std().to_frame().transpose()
@@ -50,10 +53,10 @@ def main():
### Parameter estimation with 'leave-N-out'
# Example use case: For each combination of data where one data point is left
# out, estimate theta
lNo_theta = pest.theta_est_leaveNout(1)
lNo_theta = pest.theta_est_leaveNout(1, seed=524)
print(lNo_theta.head())

parmest.graphics.pairwise_plot(lNo_theta, theta)
parmest.graphics.pairwise_plot(lNo_theta, theta, seed=524)

### Leave one out/boostrap analysis
# Example use case: leave 25 data points out, run 20 bootstrap samples with the
@@ -81,6 +84,8 @@ def main():
alpha,
["MVN"],
title="Alpha: " + str(alpha) + ", " + str(theta_est_N.loc[0, alpha]),
seed=524 + i, # setting the seed for testing repeatability,
# for typical use cases, this should not be set
)

# Extract the percent of points that are within the alpha region
25 changes: 19 additions & 6 deletions pyomo/contrib/parmest/graphics.py
Original file line number Diff line number Diff line change
@@ -218,6 +218,7 @@ def pairwise_plot(
add_obj_contour=True,
add_legend=True,
filename=None,
seed=None,
):
"""
Plot pairwise relationship for theta values, and optionally alpha-level
@@ -272,6 +273,9 @@ def pairwise_plot(
Add a legend to the plot
filename: string, optional
Filename used to save the figure
seed: int, optional
Random seed used to generate theta values if theta_values is a tuple.
If None, the seed is not set.
"""
assert isinstance(theta_values, (pd.DataFrame, tuple))
assert isinstance(theta_star, (type(None), dict, pd.Series, pd.DataFrame))
@@ -283,6 +287,9 @@ def pairwise_plot(
assert isinstance(add_obj_contour, bool)
assert isinstance(filename, (type(None), str))

if seed is not None:
np.random.seed(seed)

# If theta_values is a tuple containing (mean, cov, n), create a DataFrame of values
if isinstance(theta_values, tuple):
assert len(theta_values) == 3
@@ -292,7 +299,7 @@ def pairwise_plot(
if isinstance(mean, dict):
mean = pd.Series(mean)
theta_names = mean.index
mvn_dist = stats.multivariate_normal(mean, cov)
mvn_dist = stats.multivariate_normal(mean, cov, seed=seed)
theta_values = pd.DataFrame(
mvn_dist.rvs(n, random_state=1), columns=theta_names
)
@@ -402,7 +409,7 @@ def pairwise_plot(
)

elif dist == "MVN":
mvn_dist = fit_mvn_dist(thetas)
mvn_dist = fit_mvn_dist(thetas, seed=seed)
Z = mvn_dist.pdf(thetas)
score = stats.scoreatpercentile(Z, (1 - alpha) * 100)
g.map_offdiag(
@@ -419,7 +426,7 @@ def pairwise_plot(
)

elif dist == "KDE":
kde_dist = fit_kde_dist(thetas)
kde_dist = fit_kde_dist(thetas, seed=seed)
Z = kde_dist.pdf(thetas.transpose())
score = stats.scoreatpercentile(Z, (1 - alpha) * 100)
g.map_offdiag(
@@ -512,7 +519,7 @@ def fit_rect_dist(theta_values, alpha):
return lower_bound, upper_bound


def fit_mvn_dist(theta_values):
def fit_mvn_dist(theta_values, seed=None):
"""
Fit a multivariate normal distribution to theta values
@@ -527,13 +534,17 @@ def fit_mvn_dist(theta_values):
"""
assert isinstance(theta_values, pd.DataFrame)

if seed is not None:
np.random.seed(seed)

dist = stats.multivariate_normal(
theta_values.mean(), theta_values.cov(), allow_singular=True
theta_values.mean(), theta_values.cov(), allow_singular=True, seed=seed
)

return dist


def fit_kde_dist(theta_values):
def fit_kde_dist(theta_values, seed=None):
"""
Fit a Gaussian kernel-density distribution to theta values
@@ -547,6 +558,8 @@ def fit_kde_dist(theta_values):
scipy.stats.gaussian_kde distribution
"""
assert isinstance(theta_values, pd.DataFrame)
if seed is not None:
np.random.seed(seed)

dist = stats.gaussian_kde(theta_values.transpose().values)

13 changes: 8 additions & 5 deletions pyomo/contrib/parmest/parmest.py
Original file line number Diff line number Diff line change
@@ -407,7 +407,6 @@ def _create_parmest_model(self, experiment_number):

# Add objective function (optional)
if self.obj_function:

# Check for component naming conflicts
reserved_names = [
'Total_Cost_Objective',
@@ -1123,13 +1122,14 @@ def leaveNout_bootstrap_test(

obj, theta = self.theta_est()

bootstrap_theta = self.theta_est_bootstrap(bootstrap_samples)
bootstrap_theta = self.theta_est_bootstrap(bootstrap_samples, seed=seed)

training, test = self.confidence_region_test(
bootstrap_theta,
distribution=distribution,
alphas=alphas,
test_theta_values=theta,
seed=seed,
)

results.append((sample, test, training))
@@ -1287,7 +1287,7 @@ def likelihood_ratio_test(
return LR

def confidence_region_test(
self, theta_values, distribution, alphas, test_theta_values=None
self, theta_values, distribution, alphas, test_theta_values=None, seed=None
):
"""
Confidence region test to determine if theta values are within a
@@ -1341,6 +1341,9 @@ def confidence_region_test(
if test_theta_values is not None:
test_result = test_theta_values.copy()

if seed is not None:
np.random.seed(seed)

for a in alphas:
if distribution == 'Rect':
lb, ub = graphics.fit_rect_dist(theta_values, a)
@@ -1355,7 +1358,7 @@ def confidence_region_test(
).all(axis=1)

elif distribution == 'MVN':
dist = graphics.fit_mvn_dist(theta_values)
dist = graphics.fit_mvn_dist(theta_values, seed=seed)
Z = dist.pdf(theta_values)
score = scipy.stats.scoreatpercentile(Z, (1 - a) * 100)
training_results[a] = Z >= score
@@ -1366,7 +1369,7 @@ def confidence_region_test(
test_result[a] = Z >= score

elif distribution == 'KDE':
dist = graphics.fit_kde_dist(theta_values)
dist = graphics.fit_kde_dist(theta_values, seed=seed)
Z = dist.pdf(theta_values.transpose())
score = scipy.stats.scoreatpercentile(Z, (1 - a) * 100)
training_results[a] = Z >= score
10 changes: 9 additions & 1 deletion pyomo/contrib/parmest/tests/test_graphics.py
Original file line number Diff line number Diff line change
@@ -31,6 +31,8 @@
import pyomo.contrib.parmest.parmest as parmest
import pyomo.contrib.parmest.graphics as graphics

from pyomo.contrib.parmest.tests.test_parmest import _RANDOM_SEED_FOR_TESTING

testdir = os.path.dirname(os.path.abspath(__file__))


@@ -47,6 +49,7 @@
)
class TestGraphics(unittest.TestCase):
def setUp(self):
np.random.seed(_RANDOM_SEED_FOR_TESTING)
self.A = pd.DataFrame(
np.random.randint(0, 100, size=(100, 4)), columns=list('ABCD')
)
@@ -55,7 +58,12 @@ def setUp(self):
)

def test_pairwise_plot(self):
graphics.pairwise_plot(self.A, alpha=0.8, distributions=['Rect', 'MVN', 'KDE'])
graphics.pairwise_plot(
self.A,
alpha=0.8,
distributions=['Rect', 'MVN', 'KDE'],
seed=_RANDOM_SEED_FOR_TESTING,
)

def test_grouped_boxplot(self):
graphics.grouped_boxplot(self.A, self.B, normalize=True, group_names=['A', 'B'])
28 changes: 22 additions & 6 deletions pyomo/contrib/parmest/tests/test_parmest.py
Original file line number Diff line number Diff line change
@@ -33,6 +33,9 @@
pynumero_ASL_available = AmplInterface.available()
testdir = this_file_dir()

# Set the global seed for random number generation in tests
_RANDOM_SEED_FOR_TESTING = 524


@unittest.skipIf(
not parmest.parmest_available,
@@ -45,6 +48,8 @@ def setUp(self):
RooneyBieglerExperiment,
)

np.random.seed(_RANDOM_SEED_FOR_TESTING) # Set seed for reproducibility

# Note, the data used in this test has been corrected to use
# data.loc[5,'hour'] = 7 (instead of 6)
data = pd.DataFrame(
@@ -93,7 +98,9 @@ def test_bootstrap(self):
objval, thetavals = self.pest.theta_est()

num_bootstraps = 10
theta_est = self.pest.theta_est_bootstrap(num_bootstraps, return_samples=True)
theta_est = self.pest.theta_est_bootstrap(
num_bootstraps, return_samples=True, seed=_RANDOM_SEED_FOR_TESTING
)

num_samples = theta_est["samples"].apply(len)
self.assertEqual(len(theta_est.index), 10)
@@ -109,9 +116,15 @@ def test_bootstrap(self):
self.assertEqual(CR[0.75].sum(), 7)
self.assertEqual(CR[1.0].sum(), 10) # all true

graphics.pairwise_plot(theta_est)
graphics.pairwise_plot(theta_est, thetavals)
graphics.pairwise_plot(theta_est, thetavals, 0.8, ["MVN", "KDE", "Rect"])
graphics.pairwise_plot(theta_est, seed=_RANDOM_SEED_FOR_TESTING)
graphics.pairwise_plot(theta_est, thetavals, seed=_RANDOM_SEED_FOR_TESTING)
graphics.pairwise_plot(
theta_est,
thetavals,
0.8,
["MVN", "KDE", "Rect"],
seed=_RANDOM_SEED_FOR_TESTING,
)

@unittest.skipIf(
not graphics.imports_available, "parmest.graphics imports are unavailable"
@@ -140,7 +153,7 @@ def test_leaveNout(self):
self.assertTrue(lNo_theta.shape == (6, 2))

results = self.pest.leaveNout_bootstrap_test(
1, None, 3, "Rect", [0.5, 1.0], seed=5436
1, None, 3, "Rect", [0.5, 1.0], seed=_RANDOM_SEED_FOR_TESTING
)
self.assertEqual(len(results), 6) # 6 lNo samples
i = 1
@@ -335,6 +348,7 @@ def setUp(self):
RooneyBieglerExperiment,
)

np.random.seed(_RANDOM_SEED_FOR_TESTING) # Set seed for reproducibility
self.data = pd.DataFrame(
data=[[1, 8.3], [2, 10.3], [3, 19.0], [4, 16.0], [5, 15.6], [7, 19.8]],
columns=["hour", "y"],
@@ -710,6 +724,8 @@ def ABC_model(data):
cb_meas = data["cb"]
cc_meas = data["cc"]

np.random.seed(_RANDOM_SEED_FOR_TESTING) # Set seed for reproducibility

if isinstance(data, pd.DataFrame):
meas_t = data.index # time index
else: # dictionary
@@ -1140,7 +1156,7 @@ def test_leaveNout(self):
self.assertTrue(lNo_theta.shape == (6, 2))

results = self.pest.leaveNout_bootstrap_test(
1, None, 3, "Rect", [0.5, 1.0], seed=5436
1, None, 3, "Rect", [0.5, 1.0], seed=_RANDOM_SEED_FOR_TESTING
)
self.assertTrue(len(results) == 6) # 6 lNo samples
i = 1