diff --git a/pyomo/contrib/parmest/examples/reaction_kinetics/simple_reaction_parmest_example.py b/pyomo/contrib/parmest/examples/reaction_kinetics/simple_reaction_parmest_example.py index 72eeb606fc2..301c2bebb30 100644 --- a/pyomo/contrib/parmest/examples/reaction_kinetics/simple_reaction_parmest_example.py +++ b/pyomo/contrib/parmest/examples/reaction_kinetics/simple_reaction_parmest_example.py @@ -52,23 +52,6 @@ def simple_reaction_model(data): # fix all of the regressed parameters model.k.fix() - # =================================================================== - # Stage-specific cost computations - def ComputeFirstStageCost_rule(model): - return 0 - - model.FirstStageCost = Expression(rule=ComputeFirstStageCost_rule) - - def AllMeasurements(m): - return (float(data['y']) - m.y) ** 2 - - model.SecondStageCost = Expression(rule=AllMeasurements) - - def total_cost_rule(m): - return m.FirstStageCost + m.SecondStageCost - - model.Total_Cost_Objective = Objective(rule=total_cost_rule, sense=minimize) - return model @@ -90,6 +73,8 @@ def label_model(self): m.experiment_outputs.update( [(m.x1, self.data['x1']), (m.x2, self.data['x2']), (m.y, self.data['y'])] ) + m.measurement_error = pyo.Suffix(direction=pyo.Suffix.LOCAL) + m.measurement_error.update([(m.y, None), (m.x1, None), (m.x2, None)]) return m @@ -161,7 +146,7 @@ def main(): # Only estimate the parameter k[1]. The parameter k[2] will remain fixed # at its initial value - pest = parmest.Estimator(exp_list) + pest = parmest.Estimator(exp_list, obj_function="SSE") obj, theta = pest.theta_est() print(obj) print(theta) @@ -174,9 +159,11 @@ def main(): # ======================================================================= # Estimate both k1 and k2 and compute the covariance matrix - pest = parmest.Estimator(exp_list) - n = 15 # total number of data points used in the objective (y in 15 scenarios) - obj, theta, cov = pest.theta_est(calc_cov=True, cov_n=n) + pest = parmest.Estimator(exp_list, obj_function="SSE") + # Calculate the objective value and estimated parameters + obj, theta = pest.theta_est() + # Compute the covariance matrix using the reduced Hessian method + cov = pest.cov_est(method="reduced_hessian") print(obj) print(theta) print(cov) diff --git a/pyomo/contrib/parmest/examples/reactor_design/parameter_estimation_example.py b/pyomo/contrib/parmest/examples/reactor_design/parameter_estimation_example.py index 630802ddba9..a5a644a4c7b 100644 --- a/pyomo/contrib/parmest/examples/reactor_design/parameter_estimation_example.py +++ b/pyomo/contrib/parmest/examples/reactor_design/parameter_estimation_example.py @@ -33,11 +33,17 @@ def main(): pest = parmest.Estimator(exp_list, obj_function='SSE') - # Parameter estimation with covariance - obj, theta, cov = pest.theta_est(calc_cov=True, cov_n=17) - print(obj) + # Parameter estimation + obj, theta = pest.theta_est() + print("Least squares objective value:", obj) + print("Estimated parameters (theta):\n") print(theta) + # Compute the covariance matrix at the estimated parameter + cov = pest.cov_est() + print("Covariance matrix:\n") + print(cov) + if __name__ == "__main__": main() diff --git a/pyomo/contrib/parmest/examples/reactor_design/reactor_design.py b/pyomo/contrib/parmest/examples/reactor_design/reactor_design.py index 1b3360a2d93..6172096e1ad 100644 --- a/pyomo/contrib/parmest/examples/reactor_design/reactor_design.py +++ b/pyomo/contrib/parmest/examples/reactor_design/reactor_design.py @@ -22,15 +22,11 @@ def reactor_design_model(): # Create the concrete model model = pyo.ConcreteModel() - # Rate constants - model.k1 = pyo.Param( - initialize=5.0 / 6.0, within=pyo.PositiveReals, mutable=True - ) # min^-1 - model.k2 = pyo.Param( - initialize=5.0 / 3.0, within=pyo.PositiveReals, mutable=True - ) # min^-1 - model.k3 = pyo.Param( - initialize=1.0 / 6000.0, within=pyo.PositiveReals, mutable=True + # Rate constants, make unknown parameters variables + model.k1 = pyo.Var(initialize=5.0 / 6.0, within=pyo.PositiveReals) # min^-1 + model.k2 = pyo.Var(initialize=5.0 / 3.0, within=pyo.PositiveReals) # min^-1 + model.k3 = pyo.Var( + initialize=1.0 / 6000.0, within=pyo.PositiveReals ) # m^3/(gmol min) # Inlet concentration of A, gmol/m^3 @@ -119,6 +115,11 @@ def label_model(self): (k, pyo.ComponentUID(k)) for k in [m.k1, m.k2, m.k3] ) + m.measurement_error = pyo.Suffix(direction=pyo.Suffix.LOCAL) + m.measurement_error.update( + [(m.ca, None), (m.cb, None), (m.cc, None), (m.cd, None)] + ) + return m def get_labeled_model(self): diff --git a/pyomo/contrib/parmest/parmest.py b/pyomo/contrib/parmest/parmest.py index 21138d472da..816d88bf6de 100644 --- a/pyomo/contrib/parmest/parmest.py +++ b/pyomo/contrib/parmest/parmest.py @@ -11,6 +11,7 @@ #### Wrapping mpi-sppy functionality and local option Jan 2021, Feb 2021 #### Redesign with Experiment class Dec 2023 +# Options for using mpi-sppy or local EF only used in the deprecatedEstimator class # TODO: move use_mpisppy to a Pyomo configuration option # False implies always use the EF that is local to parmest use_mpisppy = True # Use it if we can but use local if not. @@ -44,6 +45,7 @@ from collections.abc import Callable from itertools import combinations from functools import singledispatchmethod +from collections import defaultdict from pyomo.common.dependencies import ( attempt_import, @@ -80,156 +82,6 @@ logger = logging.getLogger(__name__) -def ef_nonants(ef): - # Wrapper to call someone's ef_nonants - # (the function being called is very short, but it might be changed) - if use_mpisppy: - return sputils.ef_nonants(ef) - else: - return local_ef.ef_nonants(ef) - - -def _experiment_instance_creation_callback( - scenario_name, node_names=None, cb_data=None -): - """ - This is going to be called by mpi-sppy or the local EF and it will call into - the user's model's callback. - - Parameters: - ----------- - scenario_name: `str` Scenario name should end with a number - node_names: `None` ( Not used here ) - cb_data : dict with ["callback"], ["BootList"], - ["theta_names"], ["cb_data"], etc. - "cb_data" is passed through to user's callback function - that is the "callback" value. - "BootList" is None or bootstrap experiment number list. - (called cb_data by mpisppy) - - - Returns: - -------- - instance: `ConcreteModel` - instantiated scenario - - Note: - ---- - There is flexibility both in how the function is passed and its signature. - """ - assert cb_data is not None - outer_cb_data = cb_data - scen_num_str = re.compile(r'(\d+)$').search(scenario_name).group(1) - scen_num = int(scen_num_str) - basename = scenario_name[: -len(scen_num_str)] # to reconstruct name - - CallbackFunction = outer_cb_data["callback"] - - if callable(CallbackFunction): - callback = CallbackFunction - else: - cb_name = CallbackFunction - - if "CallbackModule" not in outer_cb_data: - raise RuntimeError( - "Internal Error: need CallbackModule in parmest callback" - ) - else: - modname = outer_cb_data["CallbackModule"] - - if isinstance(modname, str): - cb_module = im.import_module(modname, package=None) - elif isinstance(modname, types.ModuleType): - cb_module = modname - else: - print("Internal Error: bad CallbackModule") - raise - - try: - callback = getattr(cb_module, cb_name) - except: - print("Error getting function=" + cb_name + " from module=" + str(modname)) - raise - - if "BootList" in outer_cb_data: - bootlist = outer_cb_data["BootList"] - # print("debug in callback: using bootlist=",str(bootlist)) - # assuming bootlist itself is zero based - exp_num = bootlist[scen_num] - else: - exp_num = scen_num - - scen_name = basename + str(exp_num) - - cb_data = outer_cb_data["cb_data"] # cb_data might be None. - - # at least three signatures are supported. The first is preferred - try: - instance = callback(experiment_number=exp_num, cb_data=cb_data) - except TypeError: - raise RuntimeError( - "Only one callback signature is supported: " - "callback(experiment_number, cb_data) " - ) - """ - try: - instance = callback(scenario_tree_model, scen_name, node_names) - except TypeError: # deprecated signature? - try: - instance = callback(scen_name, node_names) - except: - print("Failed to create instance using callback; TypeError+") - raise - except: - print("Failed to create instance using callback.") - raise - """ - if hasattr(instance, "_mpisppy_node_list"): - raise RuntimeError(f"scenario for experiment {exp_num} has _mpisppy_node_list") - nonant_list = [ - instance.find_component(vstr) for vstr in outer_cb_data["theta_names"] - ] - if use_mpisppy: - instance._mpisppy_node_list = [ - scenario_tree.ScenarioNode( - name="ROOT", - cond_prob=1.0, - stage=1, - cost_expression=instance.FirstStageCost, - nonant_list=nonant_list, - scen_model=instance, - ) - ] - else: - instance._mpisppy_node_list = [ - scenario_tree.ScenarioNode( - name="ROOT", - cond_prob=1.0, - stage=1, - cost_expression=instance.FirstStageCost, - scen_name_list=None, - nonant_list=nonant_list, - scen_model=instance, - ) - ] - - if "ThetaVals" in outer_cb_data: - thetavals = outer_cb_data["ThetaVals"] - - # dlw august 2018: see mea code for more general theta - for name, val in thetavals.items(): - theta_cuid = ComponentUID(name) - theta_object = theta_cuid.find_component_on(instance) - if val is not None: - # print("Fixing",vstr,"at",str(thetavals[vstr])) - theta_object.fix(val) - else: - # print("Freeing",vstr) - theta_object.unfix() - - return instance - - def SSE(model): """ Returns an expression that is used to compute the sum of squared errors @@ -342,10 +194,113 @@ def _get_labeled_model(experiment): raise RuntimeError(f"Failed to clone labeled model: {exc}") +def _check_index_is_none_or_local(index): + """ + Checks if experiment outputs are scalars (i.e., not indexed) or their indices + are local (i.e., strings), e.g., `m.y1`, `m.y2`, `m.C["A"]`, `m.C["B"]` + """ + return index is None or isinstance(index, str) + + +def _format_outputs_index_as_tuple(index): + """ + Formats the indices of indexed experiment outputs + (e.g., `m.CA[t]`, `m.CB[t]`, `m.C[t, "A"]`, `m.C[t, "B"]`) as a + tuple + """ + if isinstance(index, tuple): + return index + return (index,) + + +def validate_experiment_outputs(output_vars): + """ + Checks that: + 1. All variables in the `experiment_outputs` attribute have the same + number of indices + 2. All variables in the `experiment_outputs` attribute share the same + index set + + Parameters + ---------- + output_vars : pyomo.core.base.suffix.Suffix + Experiment output variables + """ + + grouped_indices = defaultdict(list) + + # group indices by parent component + for comp in output_vars: + index = comp.index() + + # check if the output variable is a scalar (e.g., m.y1, m.y2) + # or has a local index (e.g., m.C["A"], m.C["B"]) + if _check_index_is_none_or_local(index): + pass + else: + # format the indices of output variables + # (e.g., m.CA[t], m.CB[t], m.C[t, "A"], m.C[t, "B"]) as a tuple + index_tuple = _format_outputs_index_as_tuple(index) + + # get the data-point index which is assumed to be the first entry + data_point = index_tuple[0] + + assert isinstance( + data_point, (int, float) + ), "The first index of experiment outputs must be the data point" + + parent = comp.parent_component().name + grouped_indices[parent].append(index) + + # convert to a sorted unique tuples + grouped_indices = { + name: tuple(sorted(indices)) for name, indices in grouped_indices.items() + } + + # reference index set + names = list(grouped_indices.keys()) + + if len(names) <= 1: # only one output variable exist + pass + else: + ref_name = names[0] + ref_indices = grouped_indices[ref_name] + + for name in names[1:]: + assert len(grouped_indices[name]) == len( + ref_indices + ), "Experiment outputs must have the same number of indices or data points" + + assert ( + grouped_indices[name] == ref_indices + ), "Experiment outputs must share the same indices or data points" + + def _count_total_experiments(experiment_list): """ Counts the number of data points in the list of experiments + This function has been updated to avoid double counting data points in + cases where the "experiment_outputs" suffix contain keys that belong + to different output variables (e.g., `m.y1`, `m.y2`) which are measured at + the same data point + + Assumptions: + + Experiment outputs can be scaler or local variables + (e.g., `m.y1`, `m.y2`, `m.C["A"]`, `m.C["B"]`) + + Experiment output variables can be indexed by a single index + (e.g., `m.CA[t]`, `m.CB[t]`) or by two or more indices + (e.g., `m.C[t, "A"]`, `m.C[t, "B"]`). In both cases, the data-point variable + (e.g., `t` as in time) is stored in the first index. Within each experiment, + the output families are expected to share the same time points. Across + experiments, the output families are expected to contain the same number of + time points. + + Future versions will allow for heterogeneity in the number of data points across + experiments and will require changes to this function. + Parameters ---------- experiment_list : list @@ -354,14 +309,52 @@ def _count_total_experiments(experiment_list): Returns ------- - total_number_data : int + total_data_points : int The total number of data points in the list of experiments """ - total_number_data = 0 + + total_data_points = 0 + for experiment in experiment_list: - total_number_data += len(experiment.get_labeled_model().experiment_outputs) + model = _get_labeled_model(experiment) + output_vars = model.experiment_outputs + + # check if the experiment outputs are defined correctly + validate_experiment_outputs(output_vars) + + # store the indices of the experiment outputs + indices = [] + + for output_var in output_vars.keys(): + index = output_var.index() + indices.append(index) + + # Case 1 and 2: + # scalar outputs such as m.y1, m.y2,... + # local outputs such as m.C["A"], m.C["B"],... + if all(_check_index_is_none_or_local(index) for index in indices): + total_data_points += 1 + else: + # Case 3: + # outputs with one index, e.g., m.CA[t], m.CB[t],... + # outputs with two or more indices, e.g., m.C[t, "A"], m.C[t, "B"],... + # first index must be the data-point variable + # count unique time/sample indices within this experiment + experiment_data_points = set() - return total_number_data + for index in indices: + index_tuple = _format_outputs_index_as_tuple(index) + + # if the output has one index, this gives index itself + # if the output has two or more index, assume that the first + # entry is the data-point variable + data_point = index_tuple[0] + + experiment_data_points.add(data_point) + + total_data_points += len(experiment_data_points) + + return total_data_points class CovarianceMethod(Enum): @@ -832,6 +825,11 @@ def __init__( # boolean to indicate if model is initialized using a square solve self.model_initialized = False + # If self.diagnostic mode is true, then set the logging level to INFO to print + # diagnostics from the solver + if self.diagnostic_mode: + logger.setLevel(logging.DEBUG) + # The deprecated Estimator constructor # This works by checking the type of the first argument passed to # the class constructor. If it matches the old interface (i.e. is @@ -894,20 +892,30 @@ def _return_theta_names(self): # default theta_names, created when Estimator object is created return self.estimator_theta_names - def _expand_indexed_unknowns(self, model_temp): + def _expanded_theta_info(self, model): """ - Expand indexed variables to get full list of thetas + Return scalar theta names and ComponentUIDs for all unknown parameters. + + The unknown_parameters suffix may contain either scalar ComponentData + objects or indexed components. Indexed components are expanded to their + scalar data objects. + + Names are intended for user-facing labels and DataFrame columns. + ComponentUIDs are intended for locating equivalent theta components on + cloned/transferred models. """ + theta_components = [] - model_theta_list = [] - for c in model_temp.unknown_parameters.keys(): + for c in model.unknown_parameters: if c.is_indexed(): - for _, ci in c.items(): - model_theta_list.append(ci.name) + theta_components.extend(c.values()) else: - model_theta_list.append(c.name) + theta_components.append(c) - return model_theta_list + theta_names = tuple(c.name for c in theta_components) + theta_cuids = tuple(pyo.ComponentUID(c) for c in theta_components) + + return theta_names, theta_cuids def _create_parmest_model(self, experiment_number): """ @@ -969,197 +977,274 @@ def _instance_creation_callback(self, experiment_number=None, cb_data=None): model = self._create_parmest_model(experiment_number) return model - def _Q_opt( - self, - ThetaVals=None, - solver="ef_ipopt", - return_values=[], - bootlist=None, - calc_cov=NOTSET, - cov_n=NOTSET, - ): + def _create_scenario_blocks(self, bootlist=None, theta_vals=None, fix_theta=False): """ - Set up all thetas as first stage Vars, return resulting theta - values as well as the objective function value. - + Create scenario blocks for parameter estimation. """ - if solver == "k_aug": - raise RuntimeError("k_aug no longer supported.") + model = pyo.ConcreteModel() - # (Bootstrap scenarios will use indirection through the bootlist) - if bootlist is None: - scenario_numbers = list(range(len(self.exp_list))) - scen_names = ["Scenario{}".format(i) for i in scenario_numbers] - else: - scen_names = ["Scenario{}".format(i) for i in range(len(bootlist))] + template_model = self._create_parmest_model(0) + expanded_theta_names, expanded_theta_cuids = self._expanded_theta_info( + template_model + ) - # get the probability constant that is applied to the objective function - # parmest solves the estimation problem by applying equal probabilities to - # the objective function of all the scenarios from the experiment list - self.obj_probability_constant = len(scen_names) + model._parmest_theta_names = tuple(expanded_theta_names) + model._parmest_theta_cuids = tuple(expanded_theta_cuids) - # tree_model.CallbackModule = None - outer_cb_data = dict() - outer_cb_data["callback"] = self._instance_creation_callback - if ThetaVals is not None: - outer_cb_data["ThetaVals"] = ThetaVals - if bootlist is not None: - outer_cb_data["BootList"] = bootlist - outer_cb_data["cb_data"] = None # None is OK - outer_cb_data["theta_names"] = self.estimator_theta_names + # Parent/global EF theta variables. These are indexed by names because + # they serve as user-facing labels and result keys. + model.parmest_theta = pyo.Var(model._parmest_theta_names) - options = {"solver": "ipopt"} - scenario_creator_options = {"cb_data": outer_cb_data} - if use_mpisppy: - ef = sputils.create_EF( - scen_names, - _experiment_instance_creation_callback, - EF_name="_Q_opt", - suppress_warnings=True, - scenario_creator_kwargs=scenario_creator_options, - ) - else: - ef = local_ef.create_EF( - scen_names, - _experiment_instance_creation_callback, - EF_name="_Q_opt", - suppress_warnings=True, - scenario_creator_kwargs=scenario_creator_options, - ) - self.ef_instance = ef + # Initialize parent/global theta values from the template model. + for name, cuid in zip(expanded_theta_names, expanded_theta_cuids): + template_theta_var = cuid.find_component_on(template_model) - # Solve the extensive form with ipopt - if solver == "ef_ipopt": - if calc_cov is NOTSET or not calc_cov: - # Do not calculate the reduced hessian + if template_theta_var is None: + raise RuntimeError( + f"Could not find theta variable for CUID {cuid} " + "in the template model." + ) - solver = SolverFactory('ipopt') - if self.solver_options is not None: - for key in self.solver_options: - solver.options[key] = self.solver_options[key] + parent_theta_var = model.parmest_theta[name] - solve_result = solver.solve(self.ef_instance, tee=self.tee) - assert_optimal_termination(solve_result) - elif calc_cov is not NOTSET and calc_cov: - # parmest makes the fitted parameters stage 1 variables - ind_vars = [] - for nd_name, Var, sol_val in ef_nonants(ef): - ind_vars.append(Var) - # calculate the reduced hessian - solve_result, inv_red_hes = ( - inverse_reduced_hessian.inv_reduced_hessian_barrier( - self.ef_instance, - independent_variables=ind_vars, - solver_options=self.solver_options, - tee=self.tee, - ) - ) + if theta_vals is not None and name in theta_vals: + parent_theta_var.set_value(theta_vals[name]) + else: + parent_theta_var.set_value(pyo.value(template_theta_var)) - if self.diagnostic_mode: - print( - ' Solver termination condition = ', - str(solve_result.solver.termination_condition), - ) + if fix_theta: + parent_theta_var.fix() + else: + parent_theta_var.unfix() - # assume all first stage are thetas... - theta_vals = {} - for nd_name, Var, sol_val in ef_nonants(ef): - # process the name - # the scenarios are blocks, so strip the scenario name - var_name = Var.name[Var.name.find(".") + 1 :] - theta_vals[var_name] = sol_val + scenario_numbers = ( + bootlist if bootlist is not None else list(range(len(self.exp_list))) + ) + + # get the probability constant that is applied to the objective function + # parmest solves the estimation problem by applying equal probabilities to + # the objective function of all the scenarios from the experiment list + self.obj_probability_constant = len(scenario_numbers) + + # Index scenario blocks by position, not experiment number, so bootstrap + # samples with repeated experiments are preserved. + model.scenario_indices = pyo.RangeSet(0, self.obj_probability_constant - 1) + + model.scenario_number = pyo.Param( + model.scenario_indices, + initialize={ + i: experiment_number + for i, experiment_number in enumerate(scenario_numbers) + }, + within=pyo.NonNegativeIntegers, + mutable=False, + ) - obj_val = pyo.value(ef.EF_Obj) - self.obj_value = obj_val - self.estimated_theta = theta_vals + model.exp_scenarios = pyo.Block(model.scenario_indices) - if calc_cov is not NOTSET and calc_cov: - # Calculate the covariance matrix + for i in model.scenario_indices: + experiment_number = pyo.value(model.scenario_number[i]) + parmest_model = self._create_parmest_model(experiment_number) + model.exp_scenarios[i].transfer_attributes_from(parmest_model) + + model.theta_link_constraints = pyo.ConstraintList() - if not isinstance(cov_n, int): - raise TypeError( - f"Expected an integer for the 'cov_n' argument. " - f"Got {type(cov_n)}." + # Link scenario-local theta variables to the parent/global EF theta vars. + # CUIDs are used for component lookup. Names are only used to index the + # parent EF theta variable. + for name, cuid in zip(expanded_theta_names, expanded_theta_cuids): + parent_theta_var = model.parmest_theta[name] + theta_value = pyo.value(parent_theta_var) + + for i in model.scenario_indices: + child_theta_var = cuid.find_component_on(model.exp_scenarios[i]) + + if child_theta_var is None: + raise RuntimeError( + f"Could not find theta variable for CUID {cuid} " + f"in scenario block {i}." ) - num_unknowns = max( - [ - len(experiment.get_labeled_model().unknown_parameters) - for experiment in self.exp_list - ] - ) - assert cov_n > num_unknowns, ( - "The number of datapoints must be greater than the " - "number of parameters to estimate." + + child_theta_var.set_value(theta_value) + + if fix_theta: + child_theta_var.fix() + else: + child_theta_var.unfix() + model.theta_link_constraints.add( + child_theta_var == parent_theta_var + ) + + for block in model.exp_scenarios.values(): + for obj in block.component_objects(pyo.Objective): + obj.deactivate() + + def total_obj(m): + return ( + sum( + block.Total_Cost_Objective.expr + for block in m.exp_scenarios.values() ) + / self.obj_probability_constant + ) - # Number of data points considered - n = cov_n + model.Obj = pyo.Objective(rule=total_obj, sense=pyo.minimize) - # Extract number of fitted parameters - l = len(theta_vals) + self.ef_instance = model + return model - # Assumption: Objective value is sum of squared errors - sse = obj_val + def _Q_opt( + self, + return_values=None, + bootlist=None, + solver="ef_ipopt", + theta_vals=None, + fix_theta=False, + ): + """ + _Q_opt method for parameter estimation using an extended form + optimization problem with scenario blocks for each experiment. - '''Calculate covariance assuming experimental observation errors - are independent and follow a Gaussian distribution - with constant variance. + Scenario blocks are created by cloning the original model for + each experiment and linking the parameter variables across blocks. + The objective is defined as the average of the individual scenario + objectives, which allows for simultaneous optimization across + all experiments. - The formula used in parmest was verified against equations - (7-5-15) and (7-5-16) in "Nonlinear Parameter Estimation", - Y. Bard, 1974. - This formula is also applicable if the objective is scaled by a - constant; the constant cancels out. - (was scaled by 1/n because it computes an expected value.) - ''' - cov = 2 * sse / (n - l) * inv_red_hes - cov = pd.DataFrame( - cov, index=theta_vals.keys(), columns=theta_vals.keys() - ) + Steps: + 1. Load model - parmest model should be labeled + 2. Create scenario blocks - clone model to have one per experiment + 3. Define objective and constraints for the block + 4. Solve the block as a single problem + 5. Analyze results and extract parameter estimates - theta_vals = pd.Series(theta_vals) + Parameters + ---------- + return_values : list, optional + List of variable names to return values for. Default is None. + bootlist : list, optional + List of bootstrap experiment numbers to use. If None, use all experiments in exp_list. + Default is None. + theta_vals : dict, optional + Dictionary of theta values to set in the model. If None, use default values from experiment class. + Default is None. + solver : str, optional + Solver to use for optimization. Default is "ef_ipopt". + fix_theta : bool, optional + If True, fix the theta values in the model. If False, leave them free. + Default is False. + Returns + ------- + obj_value : float + Objective value of the solved model. + If fix_theta is True, this is the value of the objective at the fixed theta values. + If fix_theta is False, this is the optimal value of the objective. + theta_estimates : dict + Dictionary of estimated theta values. If fix_theta is True, this will be the same as + the input theta_vals (or default values if theta_vals is None). If fix_theta is False, + this will be the estimated parameter values that optimize the objective. + var_values : pd.DataFrame, optional + DataFrame of variable values for the variables specified in return_values. + Only returned if return_values is not None and contains valid variable names. + The DataFrame will have one row per scenario block (experiment) and columns + corresponding to the variable names in return_values. - if len(return_values) > 0: - var_values = [] - if len(scen_names) > 1: # multiple scenarios - block_objects = self.ef_instance.component_objects( - Block, descend_into=False - ) - else: # single scenario - block_objects = [self.ef_instance] - for exp_i in block_objects: - vals = {} - for var in return_values: - exp_i_var = exp_i.find_component(str(var)) - if ( - exp_i_var is None - ): # we might have a block such as _mpisppy_data - continue - # if value to return is ContinuousSet - if type(exp_i_var) == ContinuousSet: - temp = list(exp_i_var) - else: - temp = [pyo.value(_) for _ in exp_i_var.values()] - if len(temp) == 1: - vals[var] = temp[0] - else: - vals[var] = temp - if len(vals) > 0: - var_values.append(vals) - var_values = pd.DataFrame(var_values) - if calc_cov is not NOTSET and calc_cov: - return obj_val, theta_vals, var_values, cov - elif calc_cov is NOTSET or not calc_cov: - return obj_val, theta_vals, var_values + """ + # Create extended form model with scenario blocks + model = self._create_scenario_blocks( + bootlist=bootlist, theta_vals=theta_vals, fix_theta=fix_theta + ) + expanded_theta_names = list(model._parmest_theta_names) + + logger.debug("Parmest _Q_opt model with scenario blocks:") - if calc_cov is not NOTSET and calc_cov: - return obj_val, theta_vals, cov - elif calc_cov is NOTSET or not calc_cov: - return obj_val, theta_vals + if logger.isEnabledFor(logging.DEBUG): + model.pprint() + # Check solver and set options + if solver == "k_aug": + raise RuntimeError("k_aug no longer supported.") + if solver == "ef_ipopt": + sol = SolverFactory('ipopt') else: raise RuntimeError("Unknown solver in Q_Opt=" + solver) + # Currently, parmest is only tested with ipopt via ef_ipopt + # No other pyomo solvers have been verified to work with parmest from current release + # to my knowledge. + + if self.solver_options is not None: + for key in self.solver_options: + sol.options[key] = self.solver_options[key] + + # Solve model without loading solution values until the termination + # condition has been checked. + solve_result = sol.solve(model, tee=self.tee, load_solutions=False) + termination_condition = solve_result.solver.termination_condition + + # Separate handling of termination conditions for _Q_at_theta vs _Q_opt + # If not fixing theta, ensure optimal termination before loading the result. + if not fix_theta: + assert_optimal_termination(solve_result) + model.solutions.load_from(solve_result) + else: + if ( + termination_condition == pyo.TerminationCondition.infeasible + or len(solve_result.solution) == 0 + ): + theta_payload = dict(theta_vals) if theta_vals is not None else {} + return None, theta_payload, termination_condition + model.solutions.load_from(solve_result) + + # Extract objective value + obj_value = pyo.value(model.Obj) + theta_estimates = {} + # Extract theta estimates from parent model + for name in expanded_theta_names: + theta_estimates[name] = pyo.value(model.parmest_theta[name]) + + self.obj_value = obj_value + self.estimated_theta = theta_estimates + + # If fixing theta, return objective value, theta estimates, and solver status + if fix_theta: + return obj_value, theta_estimates, termination_condition + + # Extract return values if requested + # Assumes the model components are named the same in each block, and are pyo.Vars. + if return_values is not None and len(return_values) > 0: + var_values = [] + # In the scenario blocks structure, exp_scenarios is an IndexedBlock + exp_blocks = model.exp_scenarios.values() + # Loop over each experiment block and extract requested variable values + for exp_i in exp_blocks: + # In each block, extract requested variables + vals = {} + for var in return_values: + # Find the variable in the block + exp_i_var = exp_i.find_component(str(var)) + # Check if variable exists in the block + if exp_i_var is None: + continue + # Extract value(s) from variable + if type(exp_i_var) == ContinuousSet: + temp = list(exp_i_var) + else: + temp = [pyo.value(_) for _ in exp_i_var.values()] + if len(temp) == 1: + vals[var] = temp[0] + else: + vals[var] = temp + # Only append if vals is not empty + if len(vals) > 0: + var_values.append(vals) + # Convert to DataFrame + var_values = pd.DataFrame(var_values) + + if return_values is not None and len(return_values) > 0: + return obj_value, theta_estimates, var_values + else: + return obj_value, theta_estimates def _cov_at_theta(self, method, solver, step): """ @@ -1181,14 +1266,22 @@ def _cov_at_theta(self, method, solver, step): cov : pd.DataFrame Covariance matrix of the estimated parameters """ + if hasattr(self.ef_instance, "exp_scenarios"): + # The EF now indexes scenario blocks by scenario position. Pull the + # first position from the model metadata instead of hard-coding 0 so + # future indexing changes stay localized to scenario_indices. + scenario_index = next(iter(self.ef_instance.scenario_indices)) + ref_model = self.ef_instance.exp_scenarios[scenario_index] + else: + ref_model = self.ef_instance + if method == CovarianceMethod.reduced_hessian.value: # compute the inverse reduced hessian to be used # in the "reduced_hessian" method - # parmest makes the fitted parameters stage 1 variables - ind_vars = [] - for nd_name, Var, sol_val in ef_nonants(self.ef_instance): - ind_vars.append(Var) - # calculate the reduced hessian + + # retrieve the independent variables (i.e., estimated parameters) + ind_vars = list(self.ef_instance.parmest_theta.values()) + solve_result, inv_red_hes = ( inverse_reduced_hessian.inv_reduced_hessian_barrier( self.ef_instance, @@ -1199,6 +1292,43 @@ def _cov_at_theta(self, method, solver, step): ) self.inv_red_hes = inv_red_hes + else: + # if not using the 'reduced_hessian' method, calculate the sum of squared errors + # using 'finite_difference' method or 'automatic_differentiation_kaug' + sse_vals = [] + for experiment in self.exp_list: + model = _get_labeled_model(experiment) + + # fix the value of the unknown parameters to the estimated values + for param in model.unknown_parameters: + param.fix(self.estimated_theta[param.name]) + + # re-solve the model with the estimated parameters + results = pyo.SolverFactory(solver).solve(model, tee=self.tee) + assert_optimal_termination(results) + + # choose and evaluate the sum of squared errors expression + if self.obj_function == ObjectiveType.SSE: + sse_expr = SSE(model) + elif self.obj_function == ObjectiveType.SSE_weighted: + sse_expr = SSE_weighted(model) + else: + raise ValueError( + f"Invalid objective function for covariance calculation. " + f"The covariance matrix can only be calculated using the built-in " + f"objective functions: {[e.value for e in ObjectiveType]}. Supply " + f"the Estimator object one of these built-in objectives and " + f"re-run the code." + ) + + # evaluate the numerical SSE and store it + sse_val = pyo.value(sse_expr) + sse_vals.append(sse_val) + + sse = sum(sse_vals) + logger.info( + f"The sum of squared errors at the estimated parameter(s) is: {sse}" + ) # Number of data points considered n = self.number_exp @@ -1206,42 +1336,6 @@ def _cov_at_theta(self, method, solver, step): # Extract the number of fitted parameters l = len(self.estimated_theta) - # calculate the sum of squared errors at the estimated parameter values - sse_vals = [] - for experiment in self.exp_list: - model = _get_labeled_model(experiment) - - # fix the value of the unknown parameters to the estimated values - for param in model.unknown_parameters: - param.fix(self.estimated_theta[param.name]) - - # re-solve the model with the estimated parameters - results = pyo.SolverFactory(solver).solve(model, tee=self.tee) - assert_optimal_termination(results) - - # choose and evaluate the sum of squared errors expression - if self.obj_function == ObjectiveType.SSE: - sse_expr = SSE(model) - elif self.obj_function == ObjectiveType.SSE_weighted: - sse_expr = SSE_weighted(model) - else: - raise ValueError( - f"Invalid objective function for covariance calculation. " - f"The covariance matrix can only be calculated using the built-in " - f"objective functions: {[e.value for e in ObjectiveType]}. Supply " - f"the Estimator object one of these built-in objectives and " - f"re-run the code." - ) - - # evaluate the numerical SSE and store it - sse_val = pyo.value(sse_expr) - sse_vals.append(sse_val) - - sse = sum(sse_vals) - logger.info( - f"The sum of squared errors at the estimated parameter(s) is: {sse}" - ) - """Calculate covariance assuming experimental observation errors are independent and follow a Gaussian distribution with constant variance. @@ -1264,11 +1358,11 @@ def _cov_at_theta(self, method, solver, step): # check if the user specified 'SSE' or 'SSE_weighted' as the objective function if self.obj_function == ObjectiveType.SSE: # check if the user defined the 'measurement_error' attribute - if hasattr(model, "measurement_error"): + if hasattr(ref_model, "measurement_error"): # get the measurement errors meas_error = [ - model.measurement_error[y_hat] - for y_hat, y in model.experiment_outputs.items() + ref_model.measurement_error[y_hat] + for y_hat, y in ref_model.experiment_outputs.items() ] # check if the user supplied the values of the measurement errors @@ -1340,10 +1434,10 @@ def _cov_at_theta(self, method, solver, step): ) elif self.obj_function == ObjectiveType.SSE_weighted: # check if the user defined the 'measurement_error' attribute - if hasattr(model, "measurement_error"): + if hasattr(ref_model, "measurement_error"): meas_error = [ - model.measurement_error[y_hat] - for y_hat, y in model.experiment_outputs.items() + ref_model.measurement_error[y_hat] + for y_hat, y in ref_model.experiment_outputs.items() ] # check if the user supplied the values for the measurement errors @@ -1362,218 +1456,24 @@ def _cov_at_theta(self, method, solver, step): ) else: cov = compute_covariance_matrix( - self.exp_list, - method, - obj_function=self.covariance_objective, - theta_vals=self.estimated_theta, - step=step, - solver=solver, - tee=self.tee, - ) - else: - raise ValueError( - 'One or more values of the measurement errors have not been ' - 'supplied. All values of the measurement errors are required ' - 'for the "SSE_weighted" objective.' - ) - else: - raise AttributeError( - 'Experiment model does not have suffix "measurement_error".' - ) - - return cov - - def _Q_at_theta(self, thetavals, initialize_parmest_model=False): - """ - Return the objective function value with fixed theta values. - - Parameters - ---------- - thetavals: dict - A dictionary of theta values. - - initialize_parmest_model: boolean - If True: Solve square problem instance, build extensive form of the model for - parameter estimation, and set flag model_initialized to True. Default is False. - - Returns - ------- - objectiveval: float - The objective function value. - thetavals: dict - A dictionary of all values for theta that were input. - solvertermination: Pyomo TerminationCondition - Tries to return the "worst" solver status across the scenarios. - pyo.TerminationCondition.optimal is the best and - pyo.TerminationCondition.infeasible is the worst. - """ - - optimizer = pyo.SolverFactory('ipopt') - - if len(thetavals) > 0: - dummy_cb = { - "callback": self._instance_creation_callback, - "ThetaVals": thetavals, - "theta_names": self._return_theta_names(), - "cb_data": None, - } - else: - dummy_cb = { - "callback": self._instance_creation_callback, - "theta_names": self._return_theta_names(), - "cb_data": None, - } - - if self.diagnostic_mode: - if len(thetavals) > 0: - print(' Compute objective at theta = ', str(thetavals)) - else: - print(' Compute objective at initial theta') - - # start block of code to deal with models with no constraints - # (ipopt will crash or complain on such problems without special care) - instance = _experiment_instance_creation_callback("FOO0", None, dummy_cb) - try: # deal with special problems so Ipopt will not crash - first = next(instance.component_objects(pyo.Constraint, active=True)) - active_constraints = True - except: - active_constraints = False - # end block of code to deal with models with no constraints - - WorstStatus = pyo.TerminationCondition.optimal - totobj = 0 - scenario_numbers = list(range(len(self.exp_list))) - if initialize_parmest_model: - # create dictionary to store pyomo model instances (scenarios) - scen_dict = dict() - - for snum in scenario_numbers: - sname = "scenario_NODE" + str(snum) - instance = _experiment_instance_creation_callback(sname, None, dummy_cb) - model_theta_names = self._expand_indexed_unknowns(instance) - - if initialize_parmest_model: - # list to store fitted parameter names that will be unfixed - # after initialization - theta_init_vals = [] - # use appropriate theta_names member - theta_ref = model_theta_names - - for i, theta in enumerate(theta_ref): - # Use parser in ComponentUID to locate the component - var_cuid = ComponentUID(theta) - var_validate = var_cuid.find_component_on(instance) - if var_validate is None: - logger.warning( - "theta_name %s was not found on the model", (theta) - ) - else: - try: - if len(thetavals) == 0: - var_validate.fix() - else: - var_validate.fix(thetavals[theta]) - theta_init_vals.append(var_validate) - except: - logger.warning( - 'Unable to fix model parameter value for %s (not a Pyomo model Var)', - (theta), - ) - - if active_constraints: - if self.diagnostic_mode: - print(' Experiment = ', snum) - print(' First solve with special diagnostics wrapper') - status_obj, solved, iters, time, regu = ( - utils.ipopt_solve_with_stats( - instance, optimizer, max_iter=500, max_cpu_time=120 - ) - ) - print( - " status_obj, solved, iters, time, regularization_stat = ", - str(status_obj), - str(solved), - str(iters), - str(time), - str(regu), - ) - - results = optimizer.solve(instance) - if self.diagnostic_mode: - print( - 'standard solve solver termination condition=', - str(results.solver.termination_condition), - ) - - if ( - results.solver.termination_condition - != pyo.TerminationCondition.optimal - ): - # DLW: Aug2018: not distinguishing "middlish" conditions - if WorstStatus != pyo.TerminationCondition.infeasible: - WorstStatus = results.solver.termination_condition - if initialize_parmest_model: - if self.diagnostic_mode: - print( - "Scenario {:d} infeasible with initialized parameter values".format( - snum - ) - ) - else: - if initialize_parmest_model: - if self.diagnostic_mode: - print( - "Scenario {:d} initialization successful with initial parameter values".format( - snum - ) - ) - if initialize_parmest_model: - # unfix parameters after initialization - for theta in theta_init_vals: - theta.unfix() - scen_dict[sname] = instance - else: - if initialize_parmest_model: - # unfix parameters after initialization - for theta in theta_init_vals: - theta.unfix() - scen_dict[sname] = instance - - objobject = getattr(instance, self._second_stage_cost_exp) - objval = pyo.value(objobject) - totobj += objval - - retval = totobj / len(scenario_numbers) # -1?? - if initialize_parmest_model and not hasattr(self, 'ef_instance'): - # create extensive form of the model using scenario dictionary - if len(scen_dict) > 0: - for scen in scen_dict.values(): - scen._mpisppy_probability = 1 / len(scen_dict) - - if use_mpisppy: - EF_instance = sputils._create_EF_from_scen_dict( - scen_dict, - EF_name="_Q_at_theta", - # suppress_warnings=True - ) - else: - EF_instance = local_ef._create_EF_from_scen_dict( - scen_dict, EF_name="_Q_at_theta", nonant_for_fixed_vars=True - ) - - self.ef_instance = EF_instance - # set self.model_initialized flag to True to skip extensive form model - # creation using theta_est() - self.model_initialized = True - - # return initialized theta values - if len(thetavals) == 0: - # use appropriate theta_names member - theta_ref = self._return_theta_names() - for i, theta in enumerate(theta_ref): - thetavals[theta] = theta_init_vals[i]() + self.exp_list, + method, + obj_function=self.covariance_objective, + theta_vals=self.estimated_theta, + step=step, + solver=solver, + tee=self.tee, + ) + else: + raise ValueError( + f"Invalid objective function for covariance calculation. " + f"The covariance matrix can only be calculated using the built-in " + f"objective functions: {[e.value for e in ObjectiveType]}. Supply " + f"the Estimator object one of these built-in objectives and " + f"re-run the code." + ) - return retval, thetavals, WorstStatus + return cov def _get_sample_list(self, samplesize, num_samples, replacement=True): samplelist = list() @@ -1602,9 +1502,11 @@ def _get_sample_list(self, samplesize, num_samples, replacement=True): attempts += 1 if attempts > num_samples: # arbitrary timeout limit - raise RuntimeError("""Internal error: timeout constructing - a sample, the dim of theta may be too - close to the samplesize""") + raise RuntimeError( + "Internal error: timeout constructing " + "a sample, the dim of theta may be too " + "close to the samplesize" + ) samplelist.append((i, sample)) @@ -1671,13 +1573,7 @@ def theta_est( solver=solver, return_values=return_values ) - return self._Q_opt( - solver=solver, - return_values=return_values, - bootlist=None, - calc_cov=calc_cov, - cov_n=cov_n, - ) + return self._Q_opt(solver=solver, return_values=return_values, bootlist=None) def cov_est(self, method="finite_difference", solver="ipopt", step=1e-3): """ @@ -1944,98 +1840,119 @@ def leaveNout_bootstrap_test( return results - def objective_at_theta(self, theta_values=None, initialize_parmest_model=False): + def _normalize_theta_dataframe(self, theta_values): """ - Objective value for each theta + Validate and normalize user-provided theta_values columns. - Parameters - ---------- - theta_values: pd.DataFrame, columns=theta_names - Values of theta used to compute the objective + Returns a copy whose columns are the canonical expanded theta names from + the model. - initialize_parmest_model: boolean - If True: Solve square problem instance, build extensive form - of the model for parameter estimation, and set flag - model_initialized to True. Default is False. + This allows quote-insensitive matching while preserving canonical model + names internally. + """ + assert isinstance(theta_values, pd.DataFrame) + template_model = self._create_parmest_model(0) + expected_theta_names, _ = self._expanded_theta_info(template_model) + expected_theta_names = list(expected_theta_names) - Returns - ------- - obj_at_theta: pd.DataFrame - Objective value for each theta (infeasible solutions are - omitted). + def clean(name): + return str(name).replace("'", "") + + provided_theta_names = list(theta_values.columns) + + clean_provided = [clean(name) for name in provided_theta_names] + clean_expected = [clean(name) for name in expected_theta_names] + + if len(clean_provided) != len(set(clean_provided)): + raise ValueError(f"Duplicate theta names are not allowed: {clean_provided}") + + if len(clean_expected) != len(set(clean_expected)): + raise RuntimeError( + "Quote-insensitive theta names are ambiguous. " + f"Expanded theta names are {expected_theta_names}." + ) + + if set(clean_provided) != set(clean_expected): + raise ValueError( + f"Provided theta values {clean_provided} do not match " + f"expected theta names {clean_expected}." + ) + + canonical_name_by_clean_name = { + clean_name: canonical_name + for clean_name, canonical_name in zip(clean_expected, expected_theta_names) + } + + canonical_columns = [ + canonical_name_by_clean_name[clean_name] for clean_name in clean_provided + ] + + theta_values = theta_values.copy() + theta_values.columns = canonical_columns + + # Put columns in model order. + return theta_values[expected_theta_names] + + def objective_at_theta(self, theta_values=None, initialize_parmest_model=False): + """ + Objective value for each theta, solving extensive form problem with + fixed theta values. """ - # check if we are using deprecated parmest if self.pest_deprecated is not None: return self.pest_deprecated.objective_at_theta( theta_values=theta_values, initialize_parmest_model=initialize_parmest_model, ) - if len(self.estimator_theta_names) == 0: - pass # skip assertion if model has no fitted parameters - else: - # create a local instance of the pyomo model to access model variables and parameters - model_temp = self._create_parmest_model(0) - model_theta_list = self._expand_indexed_unknowns(model_temp) - - # if self.estimator_theta_names is not the same as temp model_theta_list, - # create self.theta_names_updated - if set(self.estimator_theta_names) == set(model_theta_list) and len( - self.estimator_theta_names - ) == len(set(model_theta_list)): - pass - else: - self.theta_names_updated = model_theta_list + if initialize_parmest_model: + deprecation_warning( + "The `initialize_parmest_model` option in `objective_at_theta()` is " + "deprecated and will be removed in future releases. Please ensure the " + "model is initialized within the Experiment class definition.", + version="6.10.1.dev0", + ) if theta_values is None: - all_thetas = {} # dictionary to store fitted variables - # use appropriate theta names member - theta_names = model_theta_list + template_model = self._create_parmest_model(0) + theta_names, _ = self._expanded_theta_info(template_model) + theta_names = list(theta_names) + all_thetas = [] else: - assert isinstance(theta_values, pd.DataFrame) - # for parallel code we need to use lists and dicts in the loop - theta_names = theta_values.columns - # # check if theta_names are in model - for theta in list(theta_names): - theta_temp = theta.replace("'", "") # cleaning quotes from theta_names - assert theta_temp in [ - t.replace("'", "") for t in model_theta_list - ], "Theta name {} in 'theta_values' not in 'theta_names' {}".format( - theta_temp, model_theta_list - ) + theta_values = self._normalize_theta_dataframe(theta_values) + theta_names = list(theta_values.columns) + all_thetas = theta_values.to_dict("records") - assert len(list(theta_names)) == len(model_theta_list) + num_tasks = len(all_thetas) if all_thetas else 1 + task_mgr = utils.ParallelTaskManager(num_tasks) - all_thetas = theta_values.to_dict('records') + local_thetas = task_mgr.global_to_local_data(all_thetas) if all_thetas else [] - if all_thetas: - task_mgr = utils.ParallelTaskManager(len(all_thetas)) - local_thetas = task_mgr.global_to_local_data(all_thetas) - else: - if initialize_parmest_model: - task_mgr = utils.ParallelTaskManager( - 1 - ) # initialization performed using just 1 set of theta values - # walk over the mesh, return objective function all_obj = list() + if len(all_thetas) > 0: for Theta in local_thetas: - obj, thetvals, worststatus = self._Q_at_theta( - Theta, initialize_parmest_model=initialize_parmest_model + obj, thetvals, worststatus = self._Q_opt( + theta_vals=Theta, fix_theta=True ) - if worststatus != pyo.TerminationCondition.infeasible: - all_obj.append(list(Theta.values()) + [obj]) - # DLW, Aug2018: should we also store the worst solver status? + + if worststatus == pyo.TerminationCondition.infeasible or obj is None: + all_obj.append(None) + else: + all_obj.append([Theta[name] for name in theta_names] + [obj]) + else: - obj, thetvals, worststatus = self._Q_at_theta( - thetavals={}, initialize_parmest_model=initialize_parmest_model - ) - if worststatus != pyo.TerminationCondition.infeasible: - all_obj.append(list(thetvals.values()) + [obj]) + obj, thetvals, worststatus = self._Q_opt(theta_vals=None, fix_theta=True) + + if worststatus == pyo.TerminationCondition.infeasible or obj is None: + all_obj.append(None) + else: + all_obj.append([thetvals[name] for name in theta_names] + [obj]) global_all_obj = task_mgr.allgather_global_data(all_obj) + global_all_obj = [row for row in global_all_obj if row is not None] + dfcols = list(theta_names) + ['obj'] obj_at_theta = pd.DataFrame(data=global_all_obj, columns=dfcols) return obj_at_theta @@ -2198,6 +2115,158 @@ def confidence_region_test( ################################ +# Only used in the deprecatedEstimator class after 6.10.1dev0 +def ef_nonants(ef): + # Wrapper to call someone's ef_nonants + # (the function being called is very short, but it might be changed) + if use_mpisppy: + return sputils.ef_nonants(ef) + else: + return local_ef.ef_nonants(ef) + + +# Only used in the deprecatedEstimator class +def _experiment_instance_creation_callback( + scenario_name, node_names=None, cb_data=None +): + """ + This is going to be called by mpi-sppy or the local EF and it will call into + the user's model's callback. + + Parameters: + ----------- + scenario_name: `str` Scenario name should end with a number + node_names: `None` ( Not used here ) + cb_data : dict with ["callback"], ["BootList"], + ["theta_names"], ["cb_data"], etc. + "cb_data" is passed through to user's callback function + that is the "callback" value. + "BootList" is None or bootstrap experiment number list. + (called cb_data by mpisppy) + + + Returns: + -------- + instance: `ConcreteModel` + instantiated scenario + + Note: + ---- + There is flexibility both in how the function is passed and its signature. + """ + assert cb_data is not None + outer_cb_data = cb_data + scen_num_str = re.compile(r'(\d+)$').search(scenario_name).group(1) + scen_num = int(scen_num_str) + basename = scenario_name[: -len(scen_num_str)] # to reconstruct name + + CallbackFunction = outer_cb_data["callback"] + + if callable(CallbackFunction): + callback = CallbackFunction + else: + cb_name = CallbackFunction + + if "CallbackModule" not in outer_cb_data: + raise RuntimeError( + "Internal Error: need CallbackModule in parmest callback" + ) + else: + modname = outer_cb_data["CallbackModule"] + + if isinstance(modname, str): + cb_module = im.import_module(modname, package=None) + elif isinstance(modname, types.ModuleType): + cb_module = modname + else: + print("Internal Error: bad CallbackModule") + raise + + try: + callback = getattr(cb_module, cb_name) + except: + print("Error getting function=" + cb_name + " from module=" + str(modname)) + raise + + if "BootList" in outer_cb_data: + bootlist = outer_cb_data["BootList"] + # print("debug in callback: using bootlist=",str(bootlist)) + # assuming bootlist itself is zero based + exp_num = bootlist[scen_num] + else: + exp_num = scen_num + + scen_name = basename + str(exp_num) + + cb_data = outer_cb_data["cb_data"] # cb_data might be None. + + # at least three signatures are supported. The first is preferred + try: + instance = callback(experiment_number=exp_num, cb_data=cb_data) + except TypeError: + raise RuntimeError( + "Only one callback signature is supported: " + "callback(experiment_number, cb_data) " + ) + """ + try: + instance = callback(scenario_tree_model, scen_name, node_names) + except TypeError: # deprecated signature? + try: + instance = callback(scen_name, node_names) + except: + print("Failed to create instance using callback; TypeError+") + raise + except: + print("Failed to create instance using callback.") + raise + """ + if hasattr(instance, "_mpisppy_node_list"): + raise RuntimeError(f"scenario for experiment {exp_num} has _mpisppy_node_list") + nonant_list = [ + instance.find_component(vstr) for vstr in outer_cb_data["theta_names"] + ] + if use_mpisppy: + instance._mpisppy_node_list = [ + scenario_tree.ScenarioNode( + name="ROOT", + cond_prob=1.0, + stage=1, + cost_expression=instance.FirstStageCost, + nonant_list=nonant_list, + scen_model=instance, + ) + ] + else: + instance._mpisppy_node_list = [ + scenario_tree.ScenarioNode( + name="ROOT", + cond_prob=1.0, + stage=1, + cost_expression=instance.FirstStageCost, + scen_name_list=None, + nonant_list=nonant_list, + scen_model=instance, + ) + ] + + if "ThetaVals" in outer_cb_data: + thetavals = outer_cb_data["ThetaVals"] + + # dlw august 2018: see mea code for more general theta + for name, val in thetavals.items(): + theta_cuid = ComponentUID(name) + theta_object = theta_cuid.find_component_on(instance) + if val is not None: + # print("Fixing",vstr,"at",str(thetavals[vstr])) + theta_object.fix(val) + else: + # print("Freeing",vstr) + theta_object.unfix() + + return instance + + @deprecated(version='6.7.2') def group_data(data, groupby_column_name, use_mean=None): """ @@ -2803,9 +2872,11 @@ def _get_sample_list(self, samplesize, num_samples, replacement=True): attempts += 1 if attempts > num_samples: # arbitrary timeout limit - raise RuntimeError("""Internal error: timeout constructing - a sample, the dim of theta may be too - close to the samplesize""") + raise RuntimeError( + "Internal error: timeout constructing " + "a sample, the dim of theta may be too " + "close to the samplesize" + ) samplelist.append((i, sample)) diff --git a/pyomo/contrib/parmest/tests/test_parmest.py b/pyomo/contrib/parmest/tests/test_parmest.py index dfeab769e71..c682e31c252 100644 --- a/pyomo/contrib/parmest/tests/test_parmest.py +++ b/pyomo/contrib/parmest/tests/test_parmest.py @@ -11,7 +11,6 @@ import os import subprocess from itertools import product - from pyomo.common.unittest import pytest from parameterized import parameterized, parameterized_class import pyomo.common.unittest as unittest @@ -347,9 +346,29 @@ def test_custom_covariance_exception(self): ): cov = self.pest.cov_est() - def test_parmest_exception(self): + def test_k_aug_solver_exception(self): + """ + Tests the error message raised when a user passes + the solver option as "k_aug" + """ + + # estimate the parameters + with pytest.raises(RuntimeError, match=r"k_aug no longer supported."): + obj_val, theta_vals = self.pest.theta_est(solver="k_aug") + + def test_unknown_solver_exception(self): + """ + Tests the error message raised when a user passes an + unsupported solver option + """ + + # estimate the parameters + with pytest.raises(RuntimeError, match=r"Unknown solver in Q_Opt=random"): + obj_val, theta_vals = self.pest.theta_est(solver="random") + + def test_exp_outputs_exception(self): """ - Test the exception raised by parmest when the "experiment_outputs" + Tests the exception raised by parmest when the "experiment_outputs" attribute is not defined in the model """ from pyomo.contrib.parmest.examples.rooney_biegler.rooney_biegler import ( @@ -508,40 +527,6 @@ def test_parallel_parmest(self): retcode = subprocess.call(rlist) self.assertEqual(retcode, 0) - @unittest.skipIf(not pynumero_ASL_available, "pynumero_ASL is not available") - def test_theta_est_cov(self): - objval, thetavals, cov = self.pest.theta_est(calc_cov=True, cov_n=6) - - self.assertAlmostEqual(objval, 4.3317112, places=2) - self.assertAlmostEqual( - thetavals["asymptote"], 19.1426, places=2 - ) # 19.1426 from the paper - self.assertAlmostEqual( - thetavals["rate_constant"], 0.5311, places=2 - ) # 0.5311 from the paper - - # Covariance matrix - self.assertAlmostEqual( - cov["asymptote"]["asymptote"], 6.155892, places=2 - ) # 6.22864 from paper - self.assertAlmostEqual( - cov["asymptote"]["rate_constant"], -0.425232, places=2 - ) # -0.4322 from paper - self.assertAlmostEqual( - cov["rate_constant"]["asymptote"], -0.425232, places=2 - ) # -0.4322 from paper - self.assertAlmostEqual( - cov["rate_constant"]["rate_constant"], 0.040571, places=2 - ) # 0.04124 from paper - - """ Why does the covariance matrix from parmest not match the paper? Parmest is - calculating the exact reduced Hessian. The paper (Rooney and Bielger, 2001) likely - employed the first order approximation common for nonlinear regression. The paper - values were verified with Scipy, which uses the same first order approximation. - The formula used in parmest was verified against equations (7-5-15) and (7-5-16) in - "Nonlinear Parameter Estimation", Y. Bard, 1974. - """ - def test_cov_scipy_least_squares_comparison(self): """ Scipy results differ in the 3rd decimal place from the paper. It is possible @@ -655,20 +640,27 @@ def setUp(self): columns=["hour", "y"], ) + # Updated models to use Vars for experiment output, and Constraints def rooney_biegler_params(data): model = pyo.ConcreteModel() model.asymptote = pyo.Param(initialize=15, mutable=True) model.rate_constant = pyo.Param(initialize=0.5, mutable=True) - model.hour = pyo.Param(within=pyo.PositiveReals, mutable=True) - model.y = pyo.Param(within=pyo.PositiveReals, mutable=True) + # Add the experiment inputs + model.h = pyo.Var(initialize=data["hour"].iloc[0], bounds=(0, 10)) - def response_rule(m, h): - expr = m.asymptote * (1 - pyo.exp(-m.rate_constant * h)) - return expr + # Fix the experiment inputs + model.h.fix() - model.response_function = pyo.Expression(data.hour, rule=response_rule) + # Add experiment outputs + model.y = pyo.Var(initialize=data['y'].iloc[0], within=pyo.PositiveReals) + + # Define the model equations + def response_rule(m): + return m.y == m.asymptote * (1 - pyo.exp(-m.rate_constant * m.h)) + + model.response_con = pyo.Constraint(rule=response_rule) return model @@ -683,14 +675,14 @@ def label_model(self): m = self.model m.experiment_outputs = pyo.Suffix(direction=pyo.Suffix.LOCAL) - m.experiment_outputs.update( - [(m.hour, self.data["hour"]), (m.y, self.data["y"])] - ) + m.experiment_outputs.update([(m.y, self.data["y"])]) m.unknown_parameters = pyo.Suffix(direction=pyo.Suffix.LOCAL) m.unknown_parameters.update( (k, pyo.ComponentUID(k)) for k in [m.asymptote, m.rate_constant] ) + m.measurement_error = pyo.Suffix(direction=pyo.Suffix.LOCAL) + m.measurement_error.update([(m.y, None)]) rooney_biegler_params_exp_list = [] for i in range(self.data.shape[0]): @@ -701,24 +693,30 @@ def label_model(self): def rooney_biegler_indexed_params(data): model = pyo.ConcreteModel() + # Define the indexed parameters model.param_names = pyo.Set(initialize=["asymptote", "rate_constant"]) model.theta = pyo.Param( model.param_names, initialize={"asymptote": 15, "rate_constant": 0.5}, mutable=True, ) + # Add the experiment inputs + model.h = pyo.Var(initialize=data["hour"].iloc[0], bounds=(0, 10)) - model.hour = pyo.Param(within=pyo.PositiveReals, mutable=True) - model.y = pyo.Param(within=pyo.PositiveReals, mutable=True) + # Fix the experiment inputs + model.h.fix() - def response_rule(m, h): - expr = m.theta["asymptote"] * ( - 1 - pyo.exp(-m.theta["rate_constant"] * h) - ) - return expr + # Add experiment outputs + model.y = pyo.Var(initialize=data['y'].iloc[0], within=pyo.PositiveReals) - model.response_function = pyo.Expression(data.hour, rule=response_rule) + # Define the model equations + def response_rule(m): + return m.y == m.theta["asymptote"] * ( + 1 - pyo.exp(-m.theta["rate_constant"] * m.h) + ) + # Add the model equations to the model + model.response_con = pyo.Constraint(rule=response_rule) return model class RooneyBieglerExperimentIndexedParams(RooneyBieglerExperiment): @@ -732,13 +730,14 @@ def label_model(self): m = self.model m.experiment_outputs = pyo.Suffix(direction=pyo.Suffix.LOCAL) - m.experiment_outputs.update( - [(m.hour, self.data["hour"]), (m.y, self.data["y"])] - ) + m.experiment_outputs.update([(m.y, self.data["y"])]) m.unknown_parameters = pyo.Suffix(direction=pyo.Suffix.LOCAL) m.unknown_parameters.update((k, pyo.ComponentUID(k)) for k in [m.theta]) + m.measurement_error = pyo.Suffix(direction=pyo.Suffix.LOCAL) + m.measurement_error.update([(m.y, None)]) + rooney_biegler_indexed_params_exp_list = [] for i in range(self.data.shape[0]): rooney_biegler_indexed_params_exp_list.append( @@ -753,14 +752,20 @@ def rooney_biegler_vars(data): model.asymptote.fixed = True # parmest will unfix theta variables model.rate_constant.fixed = True - model.hour = pyo.Param(within=pyo.PositiveReals, mutable=True) - model.y = pyo.Param(within=pyo.PositiveReals, mutable=True) + # Add the experiment inputs + model.h = pyo.Var(initialize=data["hour"].iloc[0], bounds=(0, 10)) - def response_rule(m, h): - expr = m.asymptote * (1 - pyo.exp(-m.rate_constant * h)) - return expr + # Fix the experiment inputs + model.h.fix() - model.response_function = pyo.Expression(data.hour, rule=response_rule) + # Add experiment outputs + model.y = pyo.Var(initialize=data['y'].iloc[0], within=pyo.PositiveReals) + + # Define the model equations + def response_rule(m): + return m.y == m.asymptote * (1 - pyo.exp(-m.rate_constant * m.h)) + + model.response_con = pyo.Constraint(rule=response_rule) return model @@ -775,14 +780,14 @@ def label_model(self): m = self.model m.experiment_outputs = pyo.Suffix(direction=pyo.Suffix.LOCAL) - m.experiment_outputs.update( - [(m.hour, self.data["hour"]), (m.y, self.data["y"])] - ) + m.experiment_outputs.update([(m.y, self.data["y"])]) m.unknown_parameters = pyo.Suffix(direction=pyo.Suffix.LOCAL) m.unknown_parameters.update( (k, pyo.ComponentUID(k)) for k in [m.asymptote, m.rate_constant] ) + m.measurement_error = pyo.Suffix(direction=pyo.Suffix.LOCAL) + m.measurement_error.update([(m.y, None)]) rooney_biegler_vars_exp_list = [] for i in range(self.data.shape[0]): @@ -802,16 +807,22 @@ def rooney_biegler_indexed_vars(data): ) model.theta["rate_constant"].fixed = True - model.hour = pyo.Param(within=pyo.PositiveReals, mutable=True) - model.y = pyo.Param(within=pyo.PositiveReals, mutable=True) + # Add the experiment inputs + model.h = pyo.Var(initialize=data["hour"].iloc[0], bounds=(0, 10)) - def response_rule(m, h): - expr = m.theta["asymptote"] * ( - 1 - pyo.exp(-m.theta["rate_constant"] * h) + # Fix the experiment inputs + model.h.fix() + + # Add experiment outputs + model.y = pyo.Var(initialize=data['y'].iloc[0], within=pyo.PositiveReals) + + # Define the model equations + def response_rule(m): + return m.y == m.theta["asymptote"] * ( + 1 - pyo.exp(-m.theta["rate_constant"] * m.h) ) - return expr - model.response_function = pyo.Expression(data.hour, rule=response_rule) + model.response_con = pyo.Constraint(rule=response_rule) return model @@ -826,28 +837,21 @@ def label_model(self): m = self.model m.experiment_outputs = pyo.Suffix(direction=pyo.Suffix.LOCAL) - m.experiment_outputs.update( - [(m.hour, self.data["hour"]), (m.y, self.data["y"])] - ) + m.experiment_outputs.update([(m.y, self.data["y"])]) m.unknown_parameters = pyo.Suffix(direction=pyo.Suffix.LOCAL) m.unknown_parameters.update((k, pyo.ComponentUID(k)) for k in [m.theta]) + m.measurement_error = pyo.Suffix(direction=pyo.Suffix.LOCAL) + m.measurement_error.update([(m.y, None)]) + rooney_biegler_indexed_vars_exp_list = [] for i in range(self.data.shape[0]): rooney_biegler_indexed_vars_exp_list.append( RooneyBieglerExperimentIndexedVars(self.data.loc[i, :]) ) - # Sum of squared error function - def SSE(model): - expr = ( - model.experiment_outputs[model.y] - - model.response_function[model.experiment_outputs[model.hour]] - ) ** 2 - return expr - - self.objective_function = SSE + self.objective_function = 'SSE' theta_vals = pd.DataFrame([20, 1], index=["asymptote", "rate_constant"]).T theta_vals_index = pd.DataFrame( @@ -899,16 +903,16 @@ def check_rooney_biegler_results(self, objval, cov): self.assertAlmostEqual(objval, 4.3317112, places=2) self.assertAlmostEqual( - cov.iloc[asymptote_index, asymptote_index], 6.30579403, places=2 + cov.iloc[asymptote_index, asymptote_index], 6.155892, places=2 ) # 6.22864 from paper self.assertAlmostEqual( - cov.iloc[asymptote_index, rate_constant_index], -0.4395341, places=2 + cov.iloc[asymptote_index, rate_constant_index], -0.425232, places=2 ) # -0.4322 from paper self.assertAlmostEqual( - cov.iloc[rate_constant_index, asymptote_index], -0.4395341, places=2 + cov.iloc[rate_constant_index, asymptote_index], -0.425232, places=2 ) # -0.4322 from paper self.assertAlmostEqual( - cov.iloc[rate_constant_index, rate_constant_index], 0.04193591, places=2 + cov.iloc[rate_constant_index, rate_constant_index], 0.040571, places=2 ) # 0.04124 from paper @unittest.skipUnless(pynumero_ASL_available, 'pynumero_ASL is not available') @@ -918,8 +922,13 @@ def test_parmest_basics(self): pest = parmest.Estimator( parmest_input["exp_list"], obj_function=self.objective_function ) - - objval, thetavals, cov = pest.theta_est(calc_cov=True, cov_n=6) + # estimate the parameters and covariance matrix + objval, thetavals = pest.theta_est() + # For covariance, using reduced_hessian method since finite difference + # and automatic differentiation may differ from paper results in the + # 3rd decimal place, likely due to differences in finite difference + # approximation of the Jacobian + cov = pest.cov_est(method="reduced_hessian") self.check_rooney_biegler_results(objval, cov) obj_at_theta = pest.objective_at_theta(parmest_input["theta_vals"]) @@ -933,7 +942,8 @@ def test_parmest_basics_with_initialize_parmest_model_option(self): parmest_input["exp_list"], obj_function=self.objective_function ) - objval, thetavals, cov = pest.theta_est(calc_cov=True, cov_n=6) + objval, thetavals = pest.theta_est() + cov = pest.cov_est(method="reduced_hessian") self.check_rooney_biegler_results(objval, cov) obj_at_theta = pest.objective_at_theta( @@ -954,7 +964,8 @@ def test_parmest_basics_with_square_problem_solve(self): parmest_input["theta_vals"], initialize_parmest_model=True ) - objval, thetavals, cov = pest.theta_est(calc_cov=True, cov_n=6) + objval, thetavals = pest.theta_est() + cov = pest.cov_est(method="reduced_hessian") self.check_rooney_biegler_results(objval, cov) self.assertAlmostEqual(obj_at_theta["obj"][0], 16.531953, places=2) @@ -970,7 +981,8 @@ def test_parmest_basics_with_square_problem_solve_no_theta_vals(self): obj_at_theta = pest.objective_at_theta(initialize_parmest_model=True) - objval, thetavals, cov = pest.theta_est(calc_cov=True, cov_n=6) + objval, thetavals = pest.theta_est() + cov = pest.cov_est(method="reduced_hessian") self.check_rooney_biegler_results(objval, cov) @@ -1107,7 +1119,8 @@ def _dccrate(m, t): def ComputeFirstStageCost_rule(m): return 0 - m.FirstStageCost = pyo.Expression(rule=ComputeFirstStageCost_rule) + # Model objective component names adjusted to prevent reserved name error. + m.FirstStage = pyo.Expression(rule=ComputeFirstStageCost_rule) def ComputeSecondStageCost_rule(m): return sum( @@ -1117,14 +1130,12 @@ def ComputeSecondStageCost_rule(m): for t in meas_t ) - m.SecondStageCost = pyo.Expression(rule=ComputeSecondStageCost_rule) + m.SecondStage = pyo.Expression(rule=ComputeSecondStageCost_rule) def total_cost_rule(model): - return model.FirstStageCost + model.SecondStageCost + return model.FirstStage + model.SecondStage - m.Total_Cost_Objective = pyo.Objective( - rule=total_cost_rule, sense=pyo.minimize - ) + m.Total_Cost = pyo.Objective(rule=total_cost_rule, sense=pyo.minimize) disc = pyo.TransformationFactory("dae.collocation") disc.apply_to(m, nfe=20, ncp=2) @@ -1165,6 +1176,10 @@ def label_model(self): m.unknown_parameters.update( (k, pyo.ComponentUID(k)) for k in [m.k1, m.k2] ) + m.measurement_error = pyo.Suffix(direction=pyo.Suffix.LOCAL) + m.measurement_error.update((m.ca[t], None) for t in meas_time_points) + m.measurement_error.update((m.cb[t], None) for t in meas_time_points) + m.measurement_error.update((m.cc[t], None) for t in meas_time_points) def get_labeled_model(self): self.create_model() @@ -1210,8 +1225,8 @@ def get_labeled_model(self): exp_list_df = [ReactorDesignExperimentDAE(data_df)] exp_list_dict = [ReactorDesignExperimentDAE(data_dict)] - self.pest_df = parmest.Estimator(exp_list_df) - self.pest_dict = parmest.Estimator(exp_list_dict) + self.pest_df = parmest.Estimator(exp_list_df, obj_function="SSE") + self.pest_dict = parmest.Estimator(exp_list_dict, obj_function="SSE") # Estimator object with multiple scenarios exp_list_df_multiple = [ @@ -1223,8 +1238,12 @@ def get_labeled_model(self): ReactorDesignExperimentDAE(data_dict), ] - self.pest_df_multiple = parmest.Estimator(exp_list_df_multiple) - self.pest_dict_multiple = parmest.Estimator(exp_list_dict_multiple) + self.pest_df_multiple = parmest.Estimator( + exp_list_df_multiple, obj_function="SSE" + ) + self.pest_dict_multiple = parmest.Estimator( + exp_list_dict_multiple, obj_function="SSE" + ) # Create an instance of the model self.m_df = ABC_model(data_df) @@ -1260,7 +1279,7 @@ def label_model(self): self.exp_list_df_no_params = exp_list_df_no_params self.exp_list_dict_no_params = exp_list_dict_no_params - def test_parmest_exception(self): + def test_unknown_parameters_exception(self): """ Test the exception raised by parmest when the "unknown_parameters" attribute is not defined in the model @@ -1312,13 +1331,14 @@ def test_covariance(self): ) # Number of datapoints. - # 3 data components (ca, cb, cc), 20 timesteps, 1 scenario = 60 - # In this example, this is the number of data points in data_df, but that's - # only because the data is indexed by time and contains no additional information. - n = 60 + # In this example, there are 20 time points and 1 experiment = 20 data points + # The data is indexed by time, so we do not consider the number of experimental + # outputs. + n = 20 # Compute covariance using parmest - obj, theta, cov = self.pest_df.theta_est(calc_cov=True, cov_n=n) + obj, theta = self.pest_df.theta_est() + cov = self.pest_df.cov_est(method="reduced_hessian") # Compute covariance using interior_point vars_list = [self.m_df.k1, self.m_df.k2] @@ -1416,6 +1436,432 @@ def test_theta_est_with_square_initialization_diagnostic_mode_true(self): self.pest.diagnostic_mode = False +class LinearThetaExperiment(Experiment): + def __init__(self, x, y, include_second_output=False): + self.x_data = x + self.y_data = y + self.include_second_output = include_second_output + self.model = None + + def create_model(self): + m = pyo.ConcreteModel() + + m.theta = pyo.Var(initialize=0.0, bounds=(-10.0, 10.0)) + m.x = pyo.Param(initialize=float(self.x_data), mutable=False) + m.y = pyo.Var(initialize=float(self.y_data)) + + m.y_link = pyo.Constraint(expr=m.y == m.theta + m.x) + + if self.include_second_output: + m.z = pyo.Var(initialize=2.0 * float(self.y_data)) + m.z_link = pyo.Constraint(expr=m.z == 2.0 * m.theta + m.x) + + self.model = m + + def label_model(self): + m = self.model + + m.experiment_outputs = pyo.Suffix(direction=pyo.Suffix.LOCAL) + m.experiment_outputs.update([(m.y, float(self.y_data))]) + + if self.include_second_output: + m.experiment_outputs.update([(m.z, float(2.0 * self.y_data))]) + + m.unknown_parameters = pyo.Suffix(direction=pyo.Suffix.LOCAL) + m.unknown_parameters.update([(m.theta, pyo.ComponentUID(m.theta))]) + + m.measurement_error = pyo.Suffix(direction=pyo.Suffix.LOCAL) + m.measurement_error.update([(m.y, None)]) + + if self.include_second_output: + m.measurement_error.update([(m.z, None)]) + + def get_labeled_model(self): + self.create_model() + self.label_model() + return self.model + + +class IndexedThetaExperiment(Experiment): + def __init__(self, x, y): + self.x_data = x + self.y_data = y + self.model = None + + def create_model(self): + m = pyo.ConcreteModel() + + m.theta_index = pyo.Set(initialize=["a", "b"]) + m.theta = pyo.Var( + m.theta_index, initialize={"a": 0.0, "b": 1.0}, bounds=(-10.0, 10.0) + ) + + m.x = pyo.Param(initialize=float(self.x_data), mutable=False) + m.y = pyo.Var(initialize=float(self.y_data)) + + m.y_link = pyo.Constraint(expr=m.y == m.theta["a"] + m.theta["b"] * m.x) + + self.model = m + + def label_model(self): + m = self.model + + m.experiment_outputs = pyo.Suffix(direction=pyo.Suffix.LOCAL) + m.experiment_outputs.update([(m.y, float(self.y_data))]) + + m.unknown_parameters = pyo.Suffix(direction=pyo.Suffix.LOCAL) + m.unknown_parameters.update([(m.theta, pyo.ComponentUID(m.theta))]) + + m.measurement_error = pyo.Suffix(direction=pyo.Suffix.LOCAL) + m.measurement_error.update([(m.y, None)]) + + def get_labeled_model(self): + self.create_model() + self.label_model() + return self.model + + +class BoundedLinearThetaExperiment(Experiment): + def __init__(self, x, y): + self.x_data = x + self.y_data = y + self.model = None + + def create_model(self): + m = pyo.ConcreteModel() + + m.theta = pyo.Var(initialize=0.0, bounds=(-10.0, 10.0)) + m.x = pyo.Param(initialize=float(self.x_data), mutable=False) + m.y = pyo.Var(initialize=float(self.y_data)) + + m.y_link = pyo.Constraint(expr=m.y == m.theta + m.x) + + # This allows fixed-theta tests to create a real infeasible model. + # For example, theta=2 and x=1 implies y=3, violating y <= 2. + m.upper_limit = pyo.Constraint(expr=m.y <= 2.0) + + self.model = m + + def label_model(self): + m = self.model + + m.experiment_outputs = pyo.Suffix(direction=pyo.Suffix.LOCAL) + m.experiment_outputs.update([(m.y, float(self.y_data))]) + + m.unknown_parameters = pyo.Suffix(direction=pyo.Suffix.LOCAL) + m.unknown_parameters.update([(m.theta, pyo.ComponentUID(m.theta))]) + + m.measurement_error = pyo.Suffix(direction=pyo.Suffix.LOCAL) + m.measurement_error.update([(m.y, None)]) + + def get_labeled_model(self): + self.create_model() + self.label_model() + return self.model + + +class IndexedOutputExperiment(Experiment): + def __init__(self, y_points, z_points): + self.y_points = list(y_points) + self.z_points = list(z_points) + self.model = None + + def create_model(self): + m = pyo.ConcreteModel() + + m.theta = pyo.Var(initialize=0.0, bounds=(-10.0, 10.0)) + + m.y_index = pyo.Set(dimen=2, ordered=True, initialize=self.y_points) + m.z_index = pyo.Set(dimen=2, ordered=True, initialize=self.z_points) + + m.y = pyo.Var(m.y_index, initialize=0.0) + m.z = pyo.Var(m.z_index, initialize=0.0) + + self.model = m + + def label_model(self): + m = self.model + + m.experiment_outputs = pyo.Suffix(direction=pyo.Suffix.LOCAL) + + m.experiment_outputs.update( + (m.y[idx], float(i)) for i, idx in enumerate(self.y_points, start=1) + ) + + m.experiment_outputs.update( + (m.z[idx], float(i)) for i, idx in enumerate(self.z_points, start=1) + ) + + m.unknown_parameters = pyo.Suffix(direction=pyo.Suffix.LOCAL) + m.unknown_parameters.update([(m.theta, pyo.ComponentUID(m.theta))]) + + m.measurement_error = pyo.Suffix(direction=pyo.Suffix.LOCAL) + + m.measurement_error.update((m.y[idx], None) for idx in self.y_points) + m.measurement_error.update((m.z[idx], None) for idx in self.z_points) + + def get_labeled_model(self): + self.create_model() + self.label_model() + return self.model + + +def _build_estimator(data, include_second_output=False): + exp_list = [ + LinearThetaExperiment(x=x, y=y, include_second_output=include_second_output) + for x, y in data + ] + + return parmest.Estimator(exp_list, obj_function="SSE") + + +def _build_indexed_theta_estimator(data): + exp_list = [IndexedThetaExperiment(x=x, y=y) for x, y in data] + + return parmest.Estimator(exp_list, obj_function="SSE") + + +def _build_bounded_estimator(data): + exp_list = [BoundedLinearThetaExperiment(x=x, y=y) for x, y in data] + + return parmest.Estimator(exp_list, obj_function="SSE") + + +@unittest.skipIf( + not parmest.parmest_available, + "Cannot test parmest: required dependencies are missing", +) +class TestParmestBlockEF(unittest.TestCase): + + def test_block_ef_structure_counts(self): + pest = _build_estimator([(1.0, 2.0), (2.0, 4.0)]) + model = pest._create_scenario_blocks() + + theta_names = model._parmest_theta_names + self.assertEqual(list(model.scenario_indices), [0, 1]) + self.assertEqual( + [pyo.value(model.scenario_number[i]) for i in model.scenario_indices], + [0, 1], + ) + self.assertEqual(list(model.exp_scenarios.keys()), list(model.scenario_indices)) + self.assertEqual(len(list(model.exp_scenarios.keys())), 2) + self.assertEqual(len(model.theta_link_constraints), 2 * len(theta_names)) + self.assertTrue(hasattr(model, "Obj")) + + for block in model.exp_scenarios.values(): + self.assertFalse(block.Total_Cost_Objective.active) + self.assertFalse(block.theta.fixed) + self.assertAlmostEqual( + pyo.value(block.theta), + pyo.value(model.parmest_theta["theta"]), + places=10, + ) + + def test_fix_theta_sets_all_scenario_theta_values(self): + pest = _build_estimator([(1.0, 2.0), (2.0, 4.0)]) + model = pest._create_scenario_blocks(theta_vals={"theta": 1.0}, fix_theta=True) + + self.assertTrue(model.parmest_theta["theta"].fixed) + self.assertAlmostEqual(pyo.value(model.parmest_theta["theta"]), 1.0, places=10) + self.assertEqual(len(model.theta_link_constraints), 0) + + for block in model.exp_scenarios.values(): + self.assertTrue(block.theta.fixed) + self.assertAlmostEqual(pyo.value(block.theta), 1.0, places=10) + + def test_duplicate_bootlist_preserves_scenario_mapping(self): + pest = _build_estimator([(1.0, 2.0), (2.0, 4.0)]) + model = pest._create_scenario_blocks(bootlist=[0, 1, 1]) + + self.assertEqual(pest.obj_probability_constant, 3) + self.assertEqual(list(model.scenario_indices), [0, 1, 2]) + self.assertEqual(list(model.exp_scenarios.keys()), [0, 1, 2]) + self.assertEqual( + [pyo.value(model.scenario_number[i]) for i in model.scenario_indices], + [0, 1, 1], + ) + self.assertIsNot(model.exp_scenarios[1], model.exp_scenarios[2]) + self.assertAlmostEqual(pyo.value(model.exp_scenarios[1].x), 2.0, places=10) + self.assertAlmostEqual(pyo.value(model.exp_scenarios[2].x), 2.0, places=10) + + def test_indexed_unknown_parameters_are_expanded_and_fixed(self): + pest = _build_indexed_theta_estimator([(1.0, 2.0), (2.0, 4.0)]) + + model = pest._create_scenario_blocks( + theta_vals={"theta[a]": 1.0, "theta[b]": 2.0}, fix_theta=True + ) + + self.assertEqual(list(model._parmest_theta_names), ["theta[a]", "theta[b]"]) + self.assertEqual(len(model.theta_link_constraints), 0) + + for block in model.exp_scenarios.values(): + self.assertTrue(block.theta["a"].fixed) + self.assertTrue(block.theta["b"].fixed) + self.assertAlmostEqual(pyo.value(block.theta["a"]), 1.0, places=10) + self.assertAlmostEqual(pyo.value(block.theta["b"]), 2.0, places=10) + + @unittest.skipIf(not ipopt_available, "The 'ipopt' solver is not available") + def test_q_opt_solves_block_ef_and_returns_theta(self): + pest = _build_estimator([(1.0, 2.0), (2.0, 4.0)]) + + obj, theta = pest._Q_opt() + + self.assertAlmostEqual(theta["theta"], 1.5, places=7) + self.assertAlmostEqual(obj, 0.25, places=7) + + @unittest.skipIf(not ipopt_available, "The 'ipopt' solver is not available") + def test_q_opt_returns_requested_values(self): + pest = _build_estimator([(1.0, 2.0), (2.0, 4.0)]) + + obj, theta, var_values = pest._Q_opt(return_values=["y"]) + + self.assertAlmostEqual(theta["theta"], 1.5, places=7) + self.assertIsInstance(var_values, pd.DataFrame) + self.assertEqual(list(var_values.columns), ["y"]) + self.assertEqual(len(var_values), 2) + self.assertAlmostEqual(var_values.loc[0, "y"], 2.5, places=7) + self.assertAlmostEqual(var_values.loc[1, "y"], 3.5, places=7) + + @unittest.skipIf(not ipopt_available, "The 'ipopt' solver is not available") + def test_q_opt_fixed_theta_returns_objective_theta_and_status(self): + pest = _build_estimator([(1.0, 2.0), (2.0, 4.0)]) + + obj, theta, status = pest._Q_opt(theta_vals={"theta": 1.0}, fix_theta=True) + + self.assertEqual(status, pyo.TerminationCondition.optimal) + self.assertEqual(theta, {"theta": 1.0}) + self.assertAlmostEqual(obj, 0.5, places=8) + + @unittest.skipIf(not ipopt_available, "The 'ipopt' solver is not available") + def test_q_opt_fixed_theta_infeasible_returns_none(self): + pest = _build_bounded_estimator([(1.0, 2.0), (2.0, 3.0)]) + + obj, theta, status = pest._Q_opt(theta_vals={"theta": 2.0}, fix_theta=True) + + self.assertIsNone(obj) + self.assertEqual(theta, {"theta": 2.0}) + self.assertEqual(status, pyo.TerminationCondition.infeasible) + + @unittest.skipIf(not ipopt_available, "The 'ipopt' solver is not available") + def test_objective_at_theta_fixed_value(self): + pest = _build_estimator([(1.0, 2.0), (2.0, 4.0)]) + + theta_values = pd.DataFrame([[1.0]], columns=["theta"]) + obj_at_theta = pest.objective_at_theta(theta_values=theta_values) + + self.assertAlmostEqual(obj_at_theta.loc[0, "obj"], 0.5, places=8) + + @unittest.skipIf(not ipopt_available, "The 'ipopt' solver is not available") + def test_objective_at_theta_none_uses_initial_theta(self): + pest = _build_estimator([(1.0, 2.0), (2.0, 3.0)]) + + obj_at_theta = pest.objective_at_theta() + + self.assertAlmostEqual(obj_at_theta.loc[0, "obj"], 1.0, places=8) + self.assertAlmostEqual(obj_at_theta.loc[0, "theta"], 0.0, places=8) + + @unittest.skipIf(not ipopt_available, "The 'ipopt' solver is not available") + def test_objective_at_theta_omits_infeasible_rows(self): + pest = _build_bounded_estimator([(1.0, 2.0), (2.0, 3.0)]) + + theta_values = pd.DataFrame([[0.0], [2.0]], columns=["theta"]) + + obj_at_theta = pest.objective_at_theta(theta_values=theta_values) + + self.assertEqual(len(obj_at_theta), 1) + self.assertEqual(list(obj_at_theta.columns), ["theta", "obj"]) + self.assertAlmostEqual(obj_at_theta.loc[0, "theta"], 0.0, places=8) + self.assertAlmostEqual(obj_at_theta.loc[0, "obj"], 1.0, places=8) + + def test_invalid_solver_name_raises_runtimeerror(self): + pest = _build_estimator([(1.0, 2.0), (2.0, 4.0)]) + + with self.assertRaisesRegex( + RuntimeError, "Unknown solver in Q_Opt=not_a_solver" + ): + pest.theta_est(solver="not_a_solver") + + def test_theta_values_duplicate_columns_rejected(self): + pest = _build_estimator([(1.0, 2.0), (2.0, 4.0)]) + + duplicate_cols = pd.DataFrame([[1.0, 2.0]], columns=["theta", "theta"]) + + with self.assertRaisesRegex( + ValueError, "Duplicate theta names are not allowed" + ): + pest.objective_at_theta(theta_values=duplicate_cols) + + +@unittest.skipIf( + not parmest.parmest_available, + "Cannot test parmest: required dependencies are missing", +) +class TestCountTotalExperiments(unittest.TestCase): + def test_count_total_experiments_multi_output(self): + exp_list = [ + LinearThetaExperiment(1.0, 2.0, include_second_output=True), + LinearThetaExperiment(2.0, 4.0, include_second_output=True), + ] + + total_points = parmest._count_total_experiments(exp_list) + + # The current parmest convention counts datapoints for one output family. + self.assertEqual(total_points, 2) + + def test_count_total_experiments_tuple_index_multi_output(self): + exp_list = [ + IndexedOutputExperiment( + y_points=[(0.0, "A"), (1.0, "A")], z_points=[(0.0, "A"), (1.0, "A")] + ), + IndexedOutputExperiment( + y_points=[(0.5, "A"), (1.5, "A")], z_points=[(0.5, "A"), (1.5, "A")] + ), + ] + + total_points = parmest._count_total_experiments(exp_list) + + self.assertEqual(total_points, 4) + + def test_count_total_experiments_rejects_mismatched_output_lengths(self): + exp_list = [ + IndexedOutputExperiment( + y_points=[(0.0, "A"), (1.0, "A")], z_points=[(0.0, "A")] + ) + ] + + with self.assertRaisesRegex( + AssertionError, + "Experiment outputs must have the same number of indices or data points", + ): + parmest._count_total_experiments(exp_list) + + def test_count_total_experiments_rejects_mismatched_time_points(self): + exp_list = [ + IndexedOutputExperiment( + y_points=[(0.0, "A"), (1.0, "A")], z_points=[(0.0, "A"), (2.0, "A")] + ) + ] + + with self.assertRaisesRegex( + AssertionError, + "Experiment outputs must share the same indices or data points", + ): + parmest._count_total_experiments(exp_list) + + def test_count_total_experiments_rejects_time_not_in_first_index(self): + exp_list = [ + IndexedOutputExperiment( + y_points=[(0.0, "A"), (1.0, "A")], z_points=[("A", 0.0), ("A", 1.0)] + ) + ] + + with self.assertRaisesRegex( + AssertionError, + "The first index of experiment outputs must be the data point", + ): + parmest._count_total_experiments(exp_list) + + ########################### # tests for deprecated UI # ########################### diff --git a/pyomo/contrib/parmest/utils/create_ef.py b/pyomo/contrib/parmest/utils/create_ef.py index 56048fced10..17336114668 100644 --- a/pyomo/contrib/parmest/utils/create_ef.py +++ b/pyomo/contrib/parmest/utils/create_ef.py @@ -21,6 +21,7 @@ from pyomo.core import Objective +# File no longer used in parmest; retained for possible future use. def get_objs(scenario_instance): """return the list of objective functions for scenario_instance""" scenario_objs = scenario_instance.component_data_objects( diff --git a/pyomo/contrib/parmest/utils/mpi_utils.py b/pyomo/contrib/parmest/utils/mpi_utils.py index 82376062c45..30560a6a77d 100644 --- a/pyomo/contrib/parmest/utils/mpi_utils.py +++ b/pyomo/contrib/parmest/utils/mpi_utils.py @@ -10,6 +10,8 @@ from collections import OrderedDict import importlib +# ParallelTaskManager is used, MPI Interface is not. + """ This module is a collection of classes that provide a friendlier interface to MPI (through mpi4py). They help