google · gabriel-trigo · Mar 9, 2025 · Mar 10, 2025 · Mar 23, 2025 · Mar 23, 2025
diff --git a/.gitignore b/.gitignore
@@ -9,3 +9,10 @@ smart_control/refactor/data/
 smart_control/refactor/experiment_results/
 smart_control/learning/
 .vscode/
+smart_control/old/
+smart_control/configs/resources/sb1/generated_configs/
+smart_control/reinforcement_learning/data/
+smart_control/reinforcement_learning/experiment_results/
+smart_control/reinforcement_learning/eval_results/
+smart_control/reinforcement_learning/test.py
+smart_control/reinforcement_learning/test.ipynb
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -28,6 +28,7 @@ ipykernel = "^6.29.5"
 typing-extensions = "^4.12.2"
 ipython = "^8.27.0"
 pytest = "^8.3.5"
+tqdm = "^4.67.1"
 
 
 [build-system]

diff --git a/smart_control/configs/resources/sb1/train_sim_configs/sim_config_1_day.gin b/smart_control/configs/resources/sb1/train_sim_configs/sim_config_1_day.gin
diff --git a/smart_control/environment/environment.py b/smart_control/environment/environment.py
@@ -20,34 +20,30 @@
 
 import collections
 import copy
+import logging as log
 import os
 import time
-from typing import Final, Mapping, NewType, Optional, Sequence, Tuple
+from typing import Final, Mapping, NewType, Optional, Sequence, Tuple, Union
 
-from absl import logging
 import bidict
 import gin
 import numpy as np
 import pandas as pd
-from smart_control.models import base_building
-from smart_control.models import base_normalizer
-from smart_control.models import base_reward_function
-from smart_control.proto import smart_control_building_pb2
-from smart_control.proto import smart_control_reward_pb2
-from smart_control.utils import building_image_generator
-from smart_control.utils import constants
-from smart_control.utils import conversion_utils
-from smart_control.utils import histogram_reducer
-from smart_control.utils import plot_utils
-from smart_control.utils import regression_building_utils
-from smart_control.utils import run_command_predictor
-from smart_control.utils import writer_lib
 import tensorflow as tf
+from absl import logging
 from tf_agents.environments import py_environment
 from tf_agents.specs import array_spec
 from tf_agents.trajectories import time_step as ts
 from tf_agents.typing import types
 
+from smart_control.models import (base_building, base_normalizer,
+                                  base_reward_function)
+from smart_control.proto import (smart_control_building_pb2,
+                                 smart_control_reward_pb2)
+from smart_control.utils import (building_image_generator, constants,
+                                 conversion_utils, histogram_reducer,
+                                 plot_utils, regression_building_utils,
+                                 run_command_predictor, writer_lib)
 
 ACTION_REJECTION_REWARD: Final[float] = -np.inf
 
@@ -78,6 +74,8 @@
 DeviceActionTuple = Tuple[DeviceCode, Setpoint]
 DeviceMeasurementTuple = Tuple[DeviceCode, MeasurementName]
 
+logger = log.getLogger(__name__)
+
 
 def all_actions_accepted(
     action_response: smart_control_building_pb2.ActionResponse,
@@ -378,7 +376,6 @@ def __init__(
       image_generator: (
           building_image_generator.BuildingImageGenerator | None
       ) = None,
-      step_interval: pd.Timedelta = pd.Timedelta(5, unit="minutes"),
       writer_factory: writer_lib.BaseWriterFactory | None = None,
   ) -> None:
     """Environment constructor.
@@ -429,10 +426,12 @@ def __init__(
     self._end_timestamp: pd.Timestamp = self._start_timestamp + pd.Timedelta(
         num_days_in_episode, unit="days"
     )
-    self._step_interval = step_interval
+    self._step_interval = pd.Timedelta(self.building.time_step_sec, unit="s")
+    logger.info("Step Interval: %s", self._step_interval)
     self._num_timesteps_in_episode = int(
         (self._end_timestamp - self._start_timestamp) / self._step_interval
     )
+    logger.info("Num Timesteps in Episode: %s", self._num_timesteps_in_episode)
     self._metrics = plot_utils.init_metrics()
     logging.info(
         "Episode starts at %s and ends at %s; % d timesteps.",

diff --git a/smart_control/environment/environment_test.py b/smart_control/environment/environment_test.py
@@ -730,15 +730,13 @@ def __init__(
           obs_normalizer,
           action_config,
           discount_factor: float = 1,
-          step_interval: pd.Timedelta = pd.Timedelta(1, unit="minute"),
       ):
         super().__init__(
             building,
             reward_function,
             obs_normalizer,
             action_config,
             discount_factor,
-            step_interval=step_interval,
         )
         self.counter = 0
 
@@ -758,7 +756,6 @@ def _step(self, action) -> ts.TimeStep:
         reward_function,
         obs_normalizer,
         action_config,
-        step_interval=step_interval,
     )
 
     utils.validate_py_environment(env, episodes=5)

diff --git a/smart_control/reinforcement_learning/__init__.py b/smart_control/reinforcement_learning/__init__.py
diff --git a/smart_control/reinforcement_learning/agents/__init__.py b/smart_control/reinforcement_learning/agents/__init__.py
diff --git a/smart_control/reinforcement_learning/agents/ddpg_agent.py b/smart_control/reinforcement_learning/agents/ddpg_agent.py
@@ -0,0 +1,145 @@
+"""DDPG Agent implementation.
+
+This module provides a function to create a DDPG agent with customizable parameters.
+"""
+
+from typing import Optional, Sequence
+
+import tensorflow as tf
+from tf_agents.agents import tf_agent
+from tf_agents.agents.ddpg import ddpg_agent
+from tf_agents.networks import network
+from tf_agents.typing import types
+
+from smart_control.reinforcement_learning.agents.networks.ddpg_networks import (
+    create_sequential_actor_network,
+    create_sequential_critic_network,
+)
+
+
+def create_ddpg_agent(
+    time_step_spec: types.TimeStep,
+    action_spec: types.NestedTensorSpec,
+
+    # Actor network parameters
+    actor_fc_layers: Sequence[int] = (128, 128),
+    actor_network: Optional[network.Network] = None,
+
+    # Critic network parameters
+    critic_obs_fc_layers: Sequence[int] = (128, 64),
+    critic_action_fc_layers: Sequence[int] = (128, 64),
+    critic_joint_fc_layers: Sequence[int] = (128, 64),
+    critic_network: Optional[network.Network] = None,
+
+    # Optimizer parameters
+    actor_learning_rate: float = 3e-4,
+    critic_learning_rate: float = 3e-4,
+
+    # Agent parameters
+    ou_stddev: float = 1.0,
+    ou_damping: float = 1.0,
+    gamma: float = 0.99,
+    target_update_tau: float = 0.005,
+    target_update_period: int = 1,
+    reward_scale_factor: float = 1.0,
+
+    # Training parameters
+    gradient_clipping: Optional[float] = None,
+    debug_summaries: bool = False,
+    summarize_grads_and_vars: bool = False,
+    train_step_counter: Optional[tf.Variable] = None,
+) -> tf_agent.TFAgent:
+    """Creates a DDPG Agent.
+
+    Args:
+        time_step_spec: A `TimeStep` spec of the expected time_steps.
+
+        action_spec: A nest of BoundedTensorSpec representing the actions.
+
+        actor_fc_layers: Iterable of fully connected layer units for the actor network.
+
+        actor_network: Optional custom actor network to use.
+
+        critic_obs_fc_layers: Iterable of fully connected layer units for the critic 
+                              observation network.
+
+        critic_action_fc_layers: Iterable of fully connected layer units for the critic
+                                 action network.
+
+        critic_joint_fc_layers: Iterable of fully connected layer units for the joint 
+                                part of the critic network.
+
+        critic_network: Optional custom critic network to use.
+
+        actor_learning_rate: Actor network learning rate.
+
+        critic_learning_rate: Critic network learning rate.
+
+        ou_stddev: Standard deviation for the Ornstein-Uhlenbeck (OU) noise added for
+                   exploration.
+
+        ou_damping: Damping factor for the OU noise.
+
+        gamma: Discount factor for future rewards.
+
+        target_update_tau: Factor for soft update of target networks.
+
+        target_update_period: Period for soft update of target networks.
+
+        reward_scale_factor: Multiplicative scale for the reward.
+
+        gradient_clipping: Norm length to clip gradients.
+
+        debug_summaries: Whether to emit debug summaries.
+
+        summarize_grads_and_vars: Whether to summarize gradients and variables.
+
+        train_step_counter: An optional counter to increment every time the train
+                            op is run. Defaults to the global_step.
+
+    Returns:
+        A TFAgent instance with the DDPG agent.
+    """
+    # Create train step counter if not provided
+    if train_step_counter is None:
+        train_step_counter = tf.Variable(0, trainable=False, dtype=tf.int64)
+
+    # Create networks if not provided
+    if actor_network is None:
+        actor_network = create_sequential_actor_network(
+            actor_fc_layers=actor_fc_layers,
+            action_tensor_spec=action_spec
+        )
+
+    if critic_network is None:
+        critic_network = create_sequential_critic_network(
+            obs_fc_layer_units=critic_obs_fc_layers,
+            action_fc_layer_units=critic_action_fc_layers,
+            joint_fc_layer_units=critic_joint_fc_layers
+        )
+
+    # Create agent
+    tf_agent = ddpg_agent.DdpgAgent(
+        time_step_spec=time_step_spec,
+        action_spec=action_spec,
+        actor_network=actor_network,
+        critic_network=critic_network,
+        actor_optimizer=tf.keras.optimizers.Adam(learning_rate=actor_learning_rate),
+        critic_optimizer=tf.keras.optimizers.Adam(learning_rate=critic_learning_rate),
+        ou_stddev=ou_stddev,
+        ou_damping=ou_damping,
+        target_update_tau=target_update_tau,
+        target_update_period=target_update_period,
+        td_errors_loss_fn=tf.math.squared_difference,
+        gamma=gamma,
+        reward_scale_factor=reward_scale_factor,
+        gradient_clipping=gradient_clipping,
+        debug_summaries=debug_summaries,
+        summarize_grads_and_vars=summarize_grads_and_vars,
+        train_step_counter=train_step_counter
+    )
+
+    # Initialize the agent
+    tf_agent.initialize()
+
+    return tf_agent
diff --git a/smart_control/reinforcement_learning/agents/networks/__init__.py b/smart_control/reinforcement_learning/agents/networks/__init__.py