diff --git a/gtep/gtep_data.py b/gtep/gtep_data.py
index 75baa4eb..251fcbc6 100644
--- a/gtep/gtep_data.py
+++ b/gtep/gtep_data.py
@@ -20,50 +20,107 @@
 from pyomo.environ import *
 from prescient.simulator.config import PrescientConfig
 from prescient.data.providers import gmlc_data_provider
-import datetime
 import pandas as pd
+import os
+from pathlib import Path
 
 
 class ExpansionPlanningData:
     """Standard data storage class for the IDAES GTEP model."""
 
-    def __init__(self):
-        pass
+    def __init__(
+        self,
+        stages=2,
+        num_reps=4,
+        len_reps=1,
+        num_commit=24,
+        num_dispatch=1,
+        duration_dispatch=60,
+    ):
+        """Initialize generation & expansion planning data object.
 
-    def load_prescient(self, data_path, options_dict=None):
+        :param stages: integer number of investment periods
+        :param num_reps: integer number of representative periods per investment period
+        :param len_reps: (for now integer) length of each representative period (in hours)
+        :param num_commit: integer number of commitment periods per representative period
+        :param num_dispatch: integer number of dispatch periods per commitment period
+        :param duration_dispatch: (for now integer) duration of each dispatch period (in minutes)
+        """
+        self.stages = stages
+        self.num_reps = num_reps
+        self.len_reps = len_reps
+        self.num_commit = num_commit
+        self.num_dispatch = num_dispatch
+        self.duration_dispatch = duration_dispatch
+
+    def load_prescient(
+        self,
+        data_path,
+        representative_dates=None,
+        representative_weights={},
+        options_dict=None,
+    ):
         """Loads data structured via Prescient data loader.
 
         :param data_path: Folder containing the data to be loaded
+        :param representative_dates: List of time points to include. Note: Change the last date for whatever extreme day is needed based on the given run(s)
+        :param representative_weights: dictionary of weights for each representative date, defaults to empty Dict
         :param options_dict: Options dictionary to pass to the Prescient data loader, defaults to None
+
         """
         self.data_type = "prescient"
-        options_dict = {
-            "data_path": data_path,
-            "input_format": "rts-gmlc",
-            "start_date": "01-01-2020",
-            "num_days": 365,
-            "sced_horizon": 1,
-            "sced_frequency_minutes": 60,
-            "ruc_horizon": 36,
-        }
-
+        # create prescient config object with defaults
         prescient_options = PrescientConfig()
+
+        # work around for prescient throwing an error with Path objects
+        if isinstance(data_path, Path):
+            data_path = str(data_path)
+
+        if options_dict is None:
+            # set basic configurations that do not match prescient defaults
+            options_dict = {
+                "data_path": data_path,
+                "num_days": 365,
+                "ruc_horizon": 36,
+            }
+
+        else:
+            # ensure data path is included in options dictionary
+            options_dict["data_path"] = data_path
+
+        # update configuration values based on options dictionary
         prescient_options.set_value(options_dict)
+
         # Use prescient data provider to load in sequential data for representative periods
         data_list = []
 
-        x = datetime.datetime(2020, 1, 1)
         data_provider = gmlc_data_provider.GmlcDataProvider(options=prescient_options)
+
+        # grab details from simulation objects file (data provider above throws error if no simulation_objects.csv exists)
+        metadata_path = os.path.join(data_path, "simulation_objects.csv")
+        metadata_df = pd.read_csv(metadata_path, index_col=0)
+
+        # save to variable for easy calling
+        sced_freq_min = prescient_options.sced_frequency_minutes
+
+        # This step is grabbing DAY_AHEAD information for now
+        # (in the future we may want to update to grab the "REAL_TIME" data if the data has reliable data since the actuals model is looking for real time data info)
+        period_per_step = int(metadata_df.loc["Periods_per_Step"]["DAY_AHEAD"])
+        total_num_steps = prescient_options.num_days * period_per_step
+
         # populate an egret model data with the basic stuff
         self.md = data_provider.get_initial_actuals_model(
-            options=prescient_options, num_time_steps=24 * 365, minutes_per_timestep=60
+            options=prescient_options,
+            num_time_steps=total_num_steps,
+            minutes_per_timestep=sced_freq_min,
         )
-        # fill in renewable actuals and maybe demand idk
+
+        # fill in renewable actuals
         data_provider.populate_with_actuals(
             options=prescient_options,
-            num_time_periods=24 * 365,
-            time_period_length_minutes=60,
-            start_time=x,
+            num_time_periods=total_num_steps,
+            time_period_length_minutes=sced_freq_min,
+            start_time=data_provider._start_time,
             model=self.md,
         )
 
@@ -89,40 +146,59 @@ def load_prescient(self, data_path, options_dict=None):
         ## of modelData objects, not just a single modelData object
         # Arbitrary time points and lengths picked for representative periods
         # default here allows up to 24 hours for periods
+        if representative_dates is None:
+            representative_dates = [
+                "2020-01-28 00:00",
+                "2020-04-23 00:00",
+                "2020-07-05 00:00",
+                "2020-10-14 00:00",  ## Change the last date for whatever extreme day is needed based on the given run(s)
+            ]
+        self.representative_dates = representative_dates
 
-        ## RMA:
-        ## Change the last date for whatever extreme day is needed based on the given run(s)
+        if not representative_weights:
+            # set the weight for each day to the total weight divided by number of days
+            total_weight = prescient_options.num_days * self.stages
+            weight_per_date = int(total_weight / (len(representative_dates)))
+            self.representative_weights = {
+                key: weight_per_date
+                for date, key in enumerate(self.representative_dates)
+            }
 
         time_keys = self.md.data["system"]["time_keys"]
-        self.representative_dates = [
-            "2020-01-28 00:00",
-            "2020-04-23 00:00",
-            "2020-07-05 00:00",
-            "2020-10-14 00:00",
-        ]
 
-        ## FIXME:
-        ## RESIL WEEK ONLY
-        ## but we'll want something similar just less insane in the future
-        if len(self.representative_dates) == 5:
-            self.representative_weights = {1: 91, 2: 91, 3: 91, 4: 91, 5: 1}
-        else:
-            self.representative_weights = {1: 91, 2: 91, 3: 91, 4: 91}
-        # self.representative_weights = {1:1}
         for date in self.representative_dates:
             key_idx = time_keys.index(date)
-            time_key_set = time_keys[key_idx : key_idx + 24]
+            time_key_set = time_keys[key_idx : key_idx + period_per_step]
             data_list.append(self.md.clone_at_time_keys(time_key_set))
 
         self.representative_data = data_list
 
-    def import_load_scaling(self, load_file_name):
+    def import_load_scaling(self, load_file_name, forecast_years=None):
+        """Imports load scaling data for forecast years.
+
+        :param load_file_name: filepath for adjusted forecast excel file
+        :param forecast_years: list of years to forecast, defaults to [2025, 2030, 2035]
+
+        """
         adjusted_forecast = pd.read_excel(load_file_name)
+
+        if forecast_years is None:
+            forecast_years = [2025, 2030, 2035]
+
+        # check years are valid
+        if len(forecast_years) < self.stages:
+            raise ValueError(
+                "Not enough forecast years for the number of stages of investment"
+            )
+        elif any(year < 2020 or year > 2050 for year in forecast_years):
+            raise ValueError(
+                "The list of years includes a year before 2020 or after 2050."
+            )
+
         adjusted_forecast_by_period = adjusted_forecast[
-            (adjusted_forecast["year"] == 2025)
-            | (adjusted_forecast["year"] == 2030)
-            | (adjusted_forecast["year"] == 2035)
+            adjusted_forecast["year"].isin(forecast_years)
         ]
+
         base_zones = [
             "base_economic_coast",
             "base_economic_east",
@@ -173,11 +249,15 @@ def import_load_scaling(self, load_file_name):
         self.load_scaling = load_scaling_df
 
     def import_outage_data(self, load_file_name):
+        """Imports outage data.
+
+        :param load_file_name: filepath for adjusted forecast excel file
+
+        """
         outage_list = pd.read_csv(load_file_name)
         percentile_threshold = 0.9
         threshold_value = outage_list["case_4b_prob"].quantile(percentile_threshold)
         filtered_outages = outage_list[outage_list["case_4b_prob"] >= threshold_value]
-        import re
 
         filtered_outages["hour"] = filtered_outages["lim_timestamp"].str.extract(
             r" (\d+):"
@@ -205,37 +285,67 @@ def import_outage_data(self, load_file_name):
         self.bus_hours = self.bus_hours.astype(int)
 
     def load_default_data_settings(self):
-        ## TODO: too many of these are hard coded; everything should check if it exists too.
+        ##many of these are hard coded, but they are not set later in the process as of now
         """Fills in necessary but unspecified data information."""
-        for gen in self.md.data["elements"]["generator"]:
-            if self.md.data["elements"]["generator"][gen]["fuel"] == "C":
-                if self.md.data["elements"]["generator"][gen]["in_service"] == False:
-                    self.md.data["elements"]["generator"][gen]["lifetime"] = 1
-                else:
-                    self.md.data["elements"]["generator"][gen]["lifetime"] = 2
-            else:
-                self.md.data["elements"]["generator"][gen]["lifetime"] = 3
-                self.md.data["elements"]["generator"][gen]["lifetime"] = 3
-            self.md.data["elements"]["generator"][gen]["spinning_reserve_frac"] = 0.1
-            self.md.data["elements"]["generator"][gen]["quickstart_reserve_frac"] = 0.1
-            self.md.data["elements"]["generator"][gen]["capital_multiplier"] = 1
-            self.md.data["elements"]["generator"][gen]["extension_multiplier"] = 0
-            self.md.data["elements"]["generator"][gen]["max_operating_reserve"] = 1
-            self.md.data["elements"]["generator"][gen]["max_spinning_reserve"] = 1
-            self.md.data["elements"]["generator"][gen]["max_quickstart_reserve"] = 1
-            self.md.data["elements"]["generator"][gen]["ramp_up_rate"] = 0.1
-            self.md.data["elements"]["generator"][gen]["ramp_down_rate"] = 0.1
-            self.md.data["elements"]["generator"][gen]["emissions_factor"] = 1
-            self.md.data["elements"]["generator"][gen]["start_fuel"] = 1
-            self.md.data["elements"]["generator"][gen]["investment_cost"] = 1
-        for branch in self.md.data["elements"]["branch"]:
-            self.md.data["elements"]["branch"][branch]["loss_rate"] = 0
-            self.md.data["elements"]["branch"][branch]["distance"] = 1
-            self.md.data["elements"]["branch"][branch]["capital_cost"] = 10000000
-        self.md.data["system"]["min_operating_reserve"] = 0.1
-        self.md.data["system"]["min_spinning_reserve"] = 0.1
+        if "elements" in self.md.data.keys():
+            if "generator" in self.md.data["elements"].keys():
+                for gen in self.md.data["elements"]["generator"]:
+                    # set lifetime value to default first
+                    self.md.data["elements"]["generator"][gen]["lifetime"] = 3
+                    if "fuel" in self.md.data["elements"]["generator"][gen].keys():
+                        if self.md.data["elements"]["generator"][gen]["fuel"] == "C":
+                            if (
+                                self.md.data["elements"]["generator"][gen]["in_service"]
+                                == False
+                            ):
+                                self.md.data["elements"]["generator"][gen][
+                                    "lifetime"
+                                ] = 1
+                            else:
+                                self.md.data["elements"]["generator"][gen][
+                                    "lifetime"
+                                ] = 2
+
+                    self.md.data["elements"]["generator"][gen][
+                        "spinning_reserve_frac"
+                    ] = 0.1
+                    self.md.data["elements"]["generator"][gen][
+                        "quickstart_reserve_frac"
+                    ] = 0.1
+                    self.md.data["elements"]["generator"][gen]["capital_multiplier"] = 1
+                    self.md.data["elements"]["generator"][gen][
+                        "extension_multiplier"
+                    ] = 0
+                    self.md.data["elements"]["generator"][gen][
+                        "max_operating_reserve"
+                    ] = 1
+                    self.md.data["elements"]["generator"][gen][
+                        "max_spinning_reserve"
+                    ] = 1
+                    self.md.data["elements"]["generator"][gen][
+                        "max_quickstart_reserve"
+                    ] = 1
+                    self.md.data["elements"]["generator"][gen]["ramp_up_rate"] = 0.1
+                    self.md.data["elements"]["generator"][gen]["ramp_down_rate"] = 0.1
+                    self.md.data["elements"]["generator"][gen]["emissions_factor"] = 1
+                    self.md.data["elements"]["generator"][gen]["start_fuel"] = 1
+                    self.md.data["elements"]["generator"][gen]["investment_cost"] = 1
+            if "branch" in self.md.data["elements"].keys():
+                for branch in self.md.data["elements"]["branch"]:
+                    self.md.data["elements"]["branch"][branch]["loss_rate"] = 0
+                    self.md.data["elements"]["branch"][branch]["distance"] = 1
+                    self.md.data["elements"]["branch"][branch][
+                        "capital_cost"
+                    ] = 10000000
+        if "system" in self.md.data.keys():
+            self.md.data["system"]["min_operating_reserve"] = 0.1
+            self.md.data["system"]["min_spinning_reserve"] = 0.1
 
     def load_storage_csv(self, data_path):
+        """Imports storage data.
+
+        :param data_path: filepath for storage data csv file
+        """
         try:
             storage_path = data_path + "/storage.csv"
             storage_df = pd.read_csv(storage_path)
@@ -253,6 +363,14 @@ def load_storage_csv(self, data_path):
             self.md.data["elements"]["storage"] = {}
 
     def texas_case_study_updates(self, data_path):
+        """Imports generator data for texas case study.
+
+        :param data_path: filepath for generator data csv file
+        """
+        # check that datapath is coming from a texas case study directory
+        if "Texas" or "Coal" not in data_path:
+            raise ValueError("The data path provided is not a Texas case study")
+
         generator_update_path = data_path + "/gen.csv"
         generator_df = pd.read_csv(generator_update_path)
         bonus_feature_list = [
diff --git a/gtep/gtep_model.py b/gtep/gtep_model.py
index 6292bc40..321ee19e 100644
--- a/gtep/gtep_model.py
+++ b/gtep/gtep_model.py
@@ -86,39 +86,28 @@ class ExpansionPlanningModel:
     def __init__(
         self,
         config=None,
-        stages=1,
         formulation=None,
         data=None,
         cost_data=None,
-        num_reps=3,
-        len_reps=24,
-        num_commit=24,
-        num_dispatch=4,
-        duration_dispatch=15,
     ):
         """Initialize generation & expansion planning model object.
 
-        :param stages: integer number of investment periods
         :param formulation: Egret stuff, to be filled
         :param data: full set of model data
         :param cost_data: full set of cost data for all generators
-        :param num_reps: integer number of representative periods per investment period
-        :param len_reps: (for now integer) length of each representative period (in hours)
-        :param num_commit: integer number of commitment periods per representative period
-        :param num_dispatch: integer number of dispatch periods per commitment period
-        :param duration_dispatch: (for now integer) duration of each dispatch period (in minutes)
+
         :return: Pyomo model for full GTEP
         """
 
-        self.stages = stages
+        self.stages = data.stages
         self.formulation = formulation
         self.data = data
         self.cost_data = cost_data
-        self.num_reps = num_reps
-        self.len_reps = len_reps
-        self.num_commit = num_commit
-        self.num_dispatch = num_dispatch
-        self.duration_dispatch = duration_dispatch
+        self.num_reps = data.num_reps
+        self.len_reps = data.len_reps
+        self.num_commit = data.num_commit
+        self.num_dispatch = data.num_dispatch
+        self.duration_dispatch = data.duration_dispatch
         self.config = _get_model_config()
         self.timer = TicTocTimer()
 
diff --git a/gtep/tests/unit/test_gtep_model.py b/gtep/tests/unit/test_gtep_model.py
index 7c5e59f9..58cf77d7 100644
--- a/gtep/tests/unit/test_gtep_model.py
+++ b/gtep/tests/unit/test_gtep_model.py
@@ -47,17 +47,45 @@ def patch_unit_handlers():
 
 
 # Helper functions
-def read_debug_model():
+def read_debug_model(
+    stages=1,
+    num_reps=3,
+    len_reps=24,
+    num_commit=24,
+    num_dispatch=4,
+    duration_dispatch=15,
+):
     curr_dir = dirname(abspath(__file__))
     debug_data_path = abspath(join(curr_dir, "..", "..", "data", "5bus"))
-    dataObject = ExpansionPlanningData()
+    dataObject = ExpansionPlanningData(
+        stages=stages,
+        num_reps=num_reps,
+        len_reps=len_reps,
+        num_commit=num_commit,
+        num_dispatch=num_dispatch,
+        duration_dispatch=duration_dispatch,
+    )
     dataObject.load_prescient(debug_data_path)
     return dataObject
 
 
-def prepare_model_and_cost_data():
+def prepare_model_and_cost_data(
+    stages=1,
+    num_reps=3,
+    len_reps=24,
+    num_commit=24,
+    num_dispatch=4,
+    duration_dispatch=15,
+):
     # Prepare model and cost data
-    dataObject = read_debug_model()
+    dataObject = read_debug_model(
+        stages,
+        num_reps,
+        len_reps,
+        num_commit,
+        num_dispatch,
+        duration_dispatch,
+    )
     curr_dir = dirname(abspath(__file__))
     data_path = abspath(join(curr_dir, "..", "..", "data", "costs"))
     bus_data_path = abspath(join(data_path, "Bus_data_gen_weights_mappings.csv"))
@@ -88,7 +116,14 @@ class TestGTEP(unittest.TestCase):
     def test_model_init(self):
         # Test that the ExpansionPlanningModel object can read a default dataset and init
         # properly with default values, including building a Pyomo.ConcreteModel object
-        data_object = read_debug_model()
+        data_object = read_debug_model(
+            stages=1,
+            num_reps=3,
+            len_reps=24,
+            num_commit=24,
+            num_dispatch=4,
+            duration_dispatch=60,
+        )
         modObject = ExpansionPlanningModel(data=data_object)
         self.assertIsInstance(modObject, ExpansionPlanningModel)
         modObject.create_model()
@@ -100,16 +135,20 @@ def test_model_init(self):
         self.assertEqual(modObject.len_reps, 24)
         self.assertEqual(modObject.num_commit, 24)
         self.assertEqual(modObject.num_dispatch, 4)
+        self.assertEqual(modObject.duration_dispatch, 60)
 
         # Test that the ExpansionPlanningModel object can read a default dataset and init
         # properly with non-default values
-        modObject = ExpansionPlanningModel(
-            data=data_object,
+        data_object = read_debug_model(
             stages=2,
             num_reps=4,
             len_reps=16,
             num_commit=12,
             num_dispatch=12,
+            duration_dispatch=30,
+        )
+        modObject = ExpansionPlanningModel(
+            data=data_object,
         )
         self.assertIsInstance(modObject, ExpansionPlanningModel)
         modObject.create_model()
@@ -121,6 +160,7 @@ def test_model_init(self):
         self.assertEqual(modObject.len_reps, 16)
         self.assertEqual(modObject.num_commit, 12)
         self.assertEqual(modObject.num_dispatch, 12)
+        self.assertEqual(modObject.duration_dispatch, 30)
 
         # We have expansion blocks and they are where and what we think they are
         expansion_blocks = modObject.model.component("investmentStage")
@@ -153,15 +193,16 @@ def test_model_init(self):
     def test_model_unit_consistency(self):
         # Test that the ExpansionPlanningModel has consistent units and spot check that
         # components have their expected units
-        data_object = read_debug_model()
-        modObject = ExpansionPlanningModel(
-            data=data_object,
+        data_object = read_debug_model(
             stages=2,
             num_reps=2,
             len_reps=2,
             num_commit=2,
             num_dispatch=2,
         )
+        modObject = ExpansionPlanningModel(
+            data=data_object,
+        )
         modObject.create_model()
         m = modObject.model
 
@@ -215,10 +256,10 @@ def test_model_unit_consistency(self):
 
     def test_solve_bigm(self):
         # Solve the debug model as is
-        data_object = read_debug_model()
-        modObject = ExpansionPlanningModel(
-            data=data_object, num_reps=1, len_reps=1, num_commit=1, num_dispatch=1
+        data_object = read_debug_model(
+            num_reps=1, len_reps=1, num_commit=1, num_dispatch=1
         )
+        modObject = ExpansionPlanningModel(data=data_object)
         modObject.create_model()
 
         # Check for consistent units
@@ -242,10 +283,17 @@ def test_solve_bigm(self):
 
     def test_no_investment(self):
         # Solve the debug model with no investment
-        data_object = read_debug_model()
+        data_object = read_debug_model(
+            num_reps=1,
+            len_reps=1,
+            num_commit=1,
+            num_dispatch=1,
+        )
         modObject = ExpansionPlanningModel(
-            data=data_object, num_reps=1, len_reps=1, num_commit=1, num_dispatch=1
+            data=data_object,
         )
+
+        modObject = ExpansionPlanningModel(data=data_object)
         modObject.config["include_investment"] = False
         modObject.create_model()
 
@@ -272,13 +320,8 @@ def test_no_investment(self):
     def test_with_cost_data_and_commitment(self):
         # Test ExpansionPlanningModel with cost data
         # This model originated from driver_esr.py
-        dataObject, dataProcessingObject = prepare_model_and_cost_data()
-
-        # Populate and create GTEP model
-        modObject = ExpansionPlanningModel(
+        dataObject, dataProcessingObject = prepare_model_and_cost_data(
             stages=2,
-            data=dataObject,
-            cost_data=dataProcessingObject,
             num_reps=2,
             len_reps=1,
             num_commit=6,
@@ -286,6 +329,12 @@ def test_with_cost_data_and_commitment(self):
             duration_dispatch=15,
         )
 
+        # Populate and create GTEP model
+        modObject = ExpansionPlanningModel(
+            data=dataObject,
+            cost_data=dataProcessingObject,
+        )
+
         modObject.config["include_investment"] = True
         modObject.config["include_commitment"] = True
         modObject.config["include_redispatch"] = True
@@ -319,13 +368,8 @@ def test_with_cost_data_and_commitment(self):
     def test_with_cost_data_and_no_commitment(self):
         # Test ExpansionPlanningModel with cost data and no commitment
         # This model originated from driver_esr.py
-        dataObject, dataProcessingObject = prepare_model_and_cost_data()
-
-        # Populate and create GTEP model
-        modObject = ExpansionPlanningModel(
+        dataObject, dataProcessingObject = prepare_model_and_cost_data(
             stages=2,
-            data=dataObject,
-            cost_data=dataProcessingObject,
             num_reps=2,
             len_reps=1,
             num_commit=6,
@@ -333,6 +377,12 @@ def test_with_cost_data_and_no_commitment(self):
             duration_dispatch=15,
         )
 
+        # Populate and create GTEP model
+        modObject = ExpansionPlanningModel(
+            data=dataObject,
+            cost_data=dataProcessingObject,
+        )
+
         modObject.config["include_investment"] = True
         modObject.config["include_commitment"] = False
         modObject.config["include_redispatch"] = True
diff --git a/gtep/tests/unit/test_validation.py b/gtep/tests/unit/test_validation.py
index 45fb458f..9cda178c 100644
--- a/gtep/tests/unit/test_validation.py
+++ b/gtep/tests/unit/test_validation.py
@@ -41,17 +41,18 @@
 
 
 def get_solution_object():
-    data_object = ExpansionPlanningData()
-    data_object.load_prescient(str(input_data_source))
-
-    mod_object = ExpansionPlanningModel(
+    data_object = ExpansionPlanningData(
         stages=2,
-        data=data_object,
         num_reps=2,
         len_reps=1,
         num_commit=6,
         num_dispatch=4,
     )
+    data_object.load_prescient(str(input_data_source))
+
+    mod_object = ExpansionPlanningModel(
+        data=data_object,
+    )
     mod_object.create_model()
     TransformationFactory("gdp.bound_pretransformation").apply_to(mod_object.model)
     TransformationFactory("gdp.bigm").apply_to(mod_object.model)