-
Notifications
You must be signed in to change notification settings - Fork 577
[Depends on #3789] Implementing multistart version of theta_est using multiple sampling methods #3575
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[Depends on #3789] Implementing multistart version of theta_est using multiple sampling methods #3575
Changes from 8 commits
cdd7d52
eca0ba8
2160aec
266beea
b877ada
9f1ffe5
43f1ab3
ea067c8
3b839ef
3a7aa1d
c688f2d
50c36bc
8e5f078
f4c7018
4444e6d
e788000
4429caf
f071718
9b1545d
80079cb
1695519
0634014
a959346
04a9096
06e0a72
6b3ee40
5cadfac
922fd57
1be2d9e
56800f5
05381c5
5b4f9c1
07ae1e8
33d838f
65a9cff
90093df
e7b2df1
10f6b37
b0b7e2c
3e95e91
8d3a4d5
3982e1b
e829344
e464097
dc5ee76
6355818
099f541
76ee05e
d91ce3f
1aea99f
7d93cc0
d7d2214
7f21344
32d8d41
4e8c3ee
1e802ba
4b89bb3
4777253
c58d5f3
ea734b7
490abea
1074fb0
9543456
af4df1a
d4c4125
9d396fa
a97b21e
0afb5ba
191b131
2c0760e
bbe994b
2c2e024
2333a4b
b9af9f1
6cd3b4b
b46e1a7
3261798
fc478be
acba985
145c2d8
337095d
837192c
4b46c30
b9cf010
062a9ee
5baaa2f
c8194ac
26d70e3
4aa027d
26ba2ea
dd926f8
7b70d1d
43cbaa4
07798c9
b325f0d
8b47430
aac1476
66a1396
3957dc9
382ea20
0da606f
935b700
1fc71ee
56ac15d
60dc796
6bf439e
a68906b
f31a35f
b124def
2908c78
58435d3
db646ba
d222c4f
9d829c4
7ffab76
e267983
e3ae6e6
345c3f2
6c3d5a0
49b787a
8cf624e
c248c74
b975089
a63e4fc
b92aa7d
471dbe7
98d91fc
a6821ae
f0ef6d6
1f86b03
72b4345
c495e0f
82ee4ce
559a900
65067d5
db26533
c9b19d7
7bba006
2cd2614
5ba4fab
ee57ec9
7cffb34
ebbd279
0c1e605
92a2dd6
854366b
c55143d
8f3a902
3c87d7a
4e09d33
4f94329
12d0af1
72204d9
0a1167d
9ccfba4
78f65f3
43a67ab
ed2cf54
2b41ba5
cc0513a
2e5ac42
813a981
44a0b83
9c0d0a3
f9c8e3a
4df5b48
12a3c1b
4fc0336
b372edd
41e8e98
9fe600f
48ba6a6
ba4091c
eba10b3
c997944
d2b5d74
ae9808f
9fa6528
cb1ecea
261a78f
664d687
5530e9d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -235,6 +235,9 @@ def SSE(model): | |
| return expr | ||
|
|
||
|
|
||
| '''Adding pseudocode for draft implementation of the estimator class, | ||
| incorporating multistart. | ||
| ''' | ||
| class Estimator(object): | ||
| """ | ||
| Parameter estimation class | ||
|
|
@@ -275,6 +278,11 @@ def __init__( | |
| solver_options=None, | ||
| ): | ||
|
|
||
| '''first theta would be provided by the user in the initialization of | ||
| the Estimator class through the unknown parameter variables. Additional | ||
| would need to be generated using the sampling method provided by the user. | ||
| ''' | ||
|
|
||
| # check that we have a (non-empty) list of experiments | ||
| assert isinstance(experiment_list, list) | ||
| self.exp_list = experiment_list | ||
|
|
@@ -447,6 +455,130 @@ def TotalCost_rule(model): | |
| parmest_model = utils.convert_params_to_vars(model, theta_names, fix_vars=False) | ||
|
|
||
| return parmest_model | ||
|
|
||
| # Make new private method, _generate_initial_theta: | ||
| # This method will be used to generate the initial theta values for multistart | ||
| # optimization. It will take the theta names and the initial theta values | ||
| # and return a dictionary of theta names and their corresponding values. | ||
| def _generate_initial_theta(self, parmest_model, seed=None, n_restarts=None, multistart_sampling_method=None, user_provided=None): | ||
| if n_restarts == 1: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like just sending a warning, and not returning. For example, n_restarts might be 1 by default. You should check if n_restarts is an
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Alex had initially said just return message, but I will ask him again |
||
| # If only one restart, return an empty list | ||
| return print("No multistart optimization needed. Please use normal theta_est()") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should raise a warning/log something here instead of using a print statement. That way you can use a debugger to control whether the message is displayed. |
||
|
|
||
| # Get the theta names and initial theta values | ||
| theta_names = self._return_theta_names() | ||
|
sscini marked this conversation as resolved.
Outdated
|
||
| initial_theta = [parmest_model.find_component(name)() for name in theta_names] | ||
|
sscini marked this conversation as resolved.
Outdated
sscini marked this conversation as resolved.
Outdated
|
||
|
|
||
| # Get the lower and upper bounds for the theta values | ||
| lower_bound = np.array([parmest_model.find_component(name).lb for name in theta_names]) | ||
| upper_bound = np.array([parmest_model.find_component(name).ub for name in theta_names]) | ||
| # Check if the lower and upper bounds are defined | ||
| if any(bound is None for bound in lower_bound) and any(bound is None for bound in upper_bound): | ||
| raise ValueError( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You probably already know this, but you will need to check all the errors are raised when expected. |
||
| "The lower and upper bounds for the theta values must be defined." | ||
| ) | ||
|
|
||
| # Check the length of theta_names and initial_theta, and make sure bounds are defined | ||
| if len(theta_names) != len(initial_theta): | ||
|
sscini marked this conversation as resolved.
Outdated
|
||
| raise ValueError( | ||
| "The length of theta_names and initial_theta must be the same." | ||
| ) | ||
|
|
||
| if multistart_sampling_method == "random": | ||
|
sscini marked this conversation as resolved.
Outdated
|
||
| np.random.seed(seed) | ||
|
sscini marked this conversation as resolved.
Outdated
|
||
| # Generate random theta values | ||
| theta_vals_multistart = np.random.uniform(lower_bound, upper_bound, size=len(theta_names)) | ||
|
sscini marked this conversation as resolved.
Outdated
|
||
|
|
||
| # Generate theta values using Latin hypercube sampling or Sobol sampling | ||
|
|
||
| elif multistart_sampling_method == "latin_hypercube": | ||
| # Generate theta values using Latin hypercube sampling | ||
| sampler = scipy.stats.qmc.LatinHypercube(d=len(theta_names), seed=seed) | ||
| samples = sampler.random(n=n_restarts) | ||
| theta_vals_multistart = np.array([lower_bound + (upper_bound - lower_bound) * theta for theta in samples]) | ||
|
sscini marked this conversation as resolved.
Outdated
|
||
|
|
||
|
|
||
| elif multistart_sampling_method == "sobol": | ||
| sampler = scipy.stats.qmc.Sobol(d=len(theta_names), seed=seed) | ||
| # Generate theta values using Sobol sampling | ||
| # The first value of the Sobol sequence is 0, so we skip it | ||
| samples = sampler.random(n=n_restarts+1)[1:] | ||
| theta_vals_multistart = np.array([lower_bound + (upper_bound - lower_bound) * theta for theta in samples]) | ||
|
sscini marked this conversation as resolved.
Outdated
|
||
|
|
||
| elif multistart_sampling_method == "user_provided": | ||
|
sscini marked this conversation as resolved.
Outdated
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think maybe Probably should use Also, should have some comments at the beginning of this describing what the method does, just like the other options. |
||
| # Add user provided dataframe option | ||
| if user_provided is not None: | ||
|
|
||
| if isinstance(user_provided, np.ndarray): | ||
| # Check if the user provided numpy array has the same number of rows as the number of restarts | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make sure comments are not too long. Break up over multiple lines like you did above...
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Running black may help? |
||
| if user_provided.shape[0] != n_restarts: | ||
|
sscini marked this conversation as resolved.
Outdated
|
||
| raise ValueError( | ||
| "The user provided numpy array must have the same number of rows as the number of restarts." | ||
| ) | ||
| # Check if the user provided numpy array has the same number of columns as the number of theta names | ||
| if user_provided.shape[1] != len(theta_names): | ||
| raise ValueError( | ||
| "The user provided numpy array must have the same number of columns as the number of theta names." | ||
| ) | ||
| # Check if the user provided numpy array has the same theta names as the model | ||
| # if not, raise an error | ||
| # if not all(theta in theta_names for theta in user_provided.columns): | ||
| raise ValueError( | ||
|
sscini marked this conversation as resolved.
Outdated
|
||
| "The user provided numpy array must have the same theta names as the model." | ||
| ) | ||
| # If all checks pass, return the user provided numpy array | ||
| theta_vals_multistart = user_provided | ||
| elif isinstance(user_provided, pd.DataFrame): | ||
| # Check if the user provided dataframe has the same number of rows as the number of restarts | ||
| if user_provided.shape[0] != n_restarts: | ||
| raise ValueError( | ||
| "The user provided dataframe must have the same number of rows as the number of restarts." | ||
| ) | ||
| # Check if the user provided dataframe has the same number of columns as the number of theta names | ||
| if user_provided.shape[1] != len(theta_names): | ||
| raise ValueError( | ||
| "The user provided dataframe must have the same number of columns as the number of theta names." | ||
| ) | ||
| # Check if the user provided dataframe has the same theta names as the model | ||
| # if not, raise an error | ||
| # if not all(theta in theta_names for theta in user_provided.columns): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be uncommented... |
||
| raise ValueError( | ||
| "The user provided dataframe must have the same theta names as the model." | ||
| ) | ||
| # If all checks pass, return the user provided dataframe | ||
| theta_vals_multistart = user_provided.iloc[0: len(initial_theta)].values | ||
| else: | ||
| raise ValueError( | ||
| "The user must provide a numpy array or pandas dataframe from a previous attempt to use the 'user_provided' method." | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same thing with all these long output messages, make sure they don't exceed one line (break them up). |
||
| ) | ||
|
|
||
| else: | ||
| raise ValueError( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This would probably be more consistent with other code (and other suggestions) if the options were using an Enum object. You can check the DoE code, or Shammah's PR. It just makes it so that the strings are attached to an object instead (safer).
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added Enum class above, working to implement here. Making note to talk to shammah about this |
||
| "Invalid sampling method. Choose 'random', 'latin_hypercube', 'sobol' or 'user_provided'." | ||
| ) | ||
|
|
||
| # Make an output dataframe with the theta names and their corresponding values for each restart, | ||
| # and nan for the output info values | ||
| df_multistart = pd.DataFrame( | ||
| theta_vals_multistart, columns=theta_names | ||
|
sscini marked this conversation as resolved.
Outdated
|
||
| ) | ||
|
|
||
|
|
||
| # Add the initial theta values to the first row of the dataframe | ||
| for i in range(1, n_restarts): | ||
| df_multistart.iloc[i, :] = theta_vals_multistart[i, :] | ||
| df_multistart.iloc[0, :] = initial_theta | ||
|
|
||
|
|
||
| # Add the output info values to the dataframe, starting values as nan | ||
| for i in range(len(theta_names)): | ||
| df_multistart[f'converged_{theta_names[i]}'] = np.nan | ||
|
sscini marked this conversation as resolved.
Outdated
|
||
| df_multistart["initial objective"] = np.nan | ||
| df_multistart["final objective"] = np.nan | ||
| df_multistart["solver termination"] = np.nan | ||
| df_multistart["solve_time"] = np.nan | ||
|
|
||
| return df_multistart | ||
|
|
||
| def _instance_creation_callback(self, experiment_number=None, cb_data=None): | ||
| model = self._create_parmest_model(experiment_number) | ||
|
|
@@ -921,6 +1053,136 @@ def theta_est( | |
| cov_n=cov_n, | ||
| ) | ||
|
|
||
| def theta_est_multistart( | ||
|
sscini marked this conversation as resolved.
Outdated
|
||
| self, | ||
| n_restarts=20, | ||
| buffer=10, | ||
|
sscini marked this conversation as resolved.
Outdated
|
||
| multistart_sampling_method="random", | ||
| user_provided=None, | ||
| seed=None, | ||
| save_results=False, | ||
| theta_vals=None, | ||
| solver="ef_ipopt", | ||
| file_name = "multistart_results.csv", | ||
| return_values=[], | ||
| ): | ||
| """ | ||
| Parameter estimation using multistart optimization | ||
|
|
||
| Parameters | ||
| ---------- | ||
| n_restarts: int, optional | ||
| Number of restarts for multistart. Default is 1. | ||
|
sscini marked this conversation as resolved.
Outdated
|
||
| theta_sampling_method: string, optional | ||
| Method used to sample theta values. Options are "random", "latin_hypercube", or "sobol". | ||
| Default is "random". | ||
| solver: string, optional | ||
|
sscini marked this conversation as resolved.
Outdated
|
||
| Currently only "ef_ipopt" is supported. Default is "ef_ipopt". | ||
| return_values: list, optional | ||
| List of Variable names, used to return values from the model for data reconciliation | ||
|
|
||
|
|
||
| Returns | ||
| ------- | ||
| objectiveval: float | ||
|
sscini marked this conversation as resolved.
Outdated
|
||
| The objective function value | ||
| thetavals: pd.Series | ||
| Estimated values for theta | ||
| variable values: pd.DataFrame | ||
| Variable values for each variable name in return_values (only for solver='ef_ipopt') | ||
|
|
||
| """ | ||
|
|
||
| # check if we are using deprecated parmest | ||
| if self.pest_deprecated is not None: | ||
| return print( | ||
| "Multistart is not supported in the deprecated parmest interface" | ||
| ) | ||
|
|
||
| assert isinstance(n_restarts, int) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also check that this is > 1
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please look at other Pyomo code fgor exampels of throwing exceptions
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree with @adowling2 here, you need to throw an exception so you can test the exception is caught. |
||
| assert isinstance(multistart_sampling_method, str) | ||
|
sscini marked this conversation as resolved.
Outdated
|
||
| assert isinstance(solver, str) | ||
| assert isinstance(return_values, list) | ||
|
|
||
| if n_restarts > 1 and multistart_sampling_method is not None: | ||
|
|
||
| # Find the initialized values of theta from the labeled parmest model | ||
| # and the theta names from the estimator object | ||
| parmest_model = self._create_parmest_model(experiment_number=0) | ||
| theta_names = self._return_theta_names() | ||
| initial_theta = [parmest_model.find_component(name)() for name in theta_names] | ||
|
|
||
| # Generate theta values using the sampling method | ||
| results_df = self._generate_initial_theta(parmest_model, seed=seed, n_restarts=n_restarts, | ||
| multistart_sampling_method=multistart_sampling_method, user_provided=user_provided) | ||
| results_df = pd.DataFrame(results_df) | ||
| # Extract theta_vals from the dataframe | ||
| theta_vals = results_df.iloc[:, :len(theta_names)] | ||
| converged_theta_vals = np.zeros((n_restarts, len(theta_names))) | ||
|
|
||
| # make empty list to store results | ||
| for i in range(n_restarts): | ||
| # for number of restarts, call the self._Q_opt method | ||
| # with the theta values generated using the _generalize_initial_theta method | ||
|
|
||
| # set the theta values in the model | ||
| theta_vals_current = theta_vals.iloc[i, :] | ||
|
|
||
|
|
||
| # Call the _Q_opt method with the generated theta values | ||
| objectiveval, converged_theta, variable_values = self._Q_opt( | ||
| ThetaVals=theta_vals_current, | ||
| solver=solver, | ||
| return_values=return_values, | ||
| ) | ||
|
|
||
| # Check if the solver terminated successfully | ||
| if variable_values.solver.termination_condition != pyo.TerminationCondition.optimal: | ||
| # If not, set the objective value to NaN | ||
| solver_termination = variable_values.solver.termination_condition | ||
| solve_time = variable_values.solver.time | ||
| thetavals = np.nan | ||
|
sscini marked this conversation as resolved.
Outdated
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is never used? |
||
|
|
||
| else: | ||
|
|
||
| # If the solver terminated successfully, set the objective value | ||
| converged_theta_vals[i, :] = converged_theta.values() | ||
| init_objectiveval = objectiveval | ||
| final_objectiveval = variable_values.solver.objective() | ||
| solver_termination = variable_values.solver.termination_condition | ||
| solve_time = variable_values.solver.time | ||
|
|
||
| # Check if the objective value is better than the best objective value | ||
| if final_objectiveval < best_objectiveval: | ||
| best_objectiveval = objectiveval | ||
| best_theta = thetavals | ||
|
|
||
| # Store the results in a list or DataFrame | ||
| # depending on the number of restarts | ||
| results_df.iloc[i, len(theta_names):len(theta_names) + len(theta_names)] = converged_theta_vals[i, :] | ||
|
sscini marked this conversation as resolved.
Outdated
|
||
| results_df.iloc[i, -4] = init_objectiveval | ||
| results_df.iloc[i, -3] = objectiveval | ||
| results_df.iloc[i, -2] = variable_values.solver.termination_condition | ||
| results_df.iloc[i, -1] = variable_values.solver.time | ||
|
|
||
| # Add buffer to save the dataframe dynamically, if save_results is True | ||
| if save_results and (i + 1) % buffer == 0: | ||
| mode = 'w' if i + 1 == buffer else 'a' | ||
|
sscini marked this conversation as resolved.
Outdated
|
||
| header = i + 1 == buffer | ||
| results_df.to_csv( | ||
| file_name, mode=mode, header=header, index=False | ||
| ) | ||
| print(f"Intermediate results saved after {i + 1} iterations.") | ||
|
sscini marked this conversation as resolved.
Outdated
|
||
|
|
||
|
sscini marked this conversation as resolved.
|
||
| # Final save after all iterations | ||
| if save_results: | ||
|
sscini marked this conversation as resolved.
Outdated
|
||
| results_df.to_csv(file_name, mode='a', header=False, index=False) | ||
| print("Final results saved.") | ||
|
sscini marked this conversation as resolved.
Outdated
|
||
|
|
||
| return results_df, best_theta, best_objectiveval | ||
|
|
||
|
|
||
|
|
||
| def theta_est_bootstrap( | ||
| self, | ||
| bootstrap_samples, | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.