Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
477 changes: 244 additions & 233 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ typing-extensions = "^4.12.2"
ipython = "^8.27.0"
importlib-resources = { version = "*", python = "<3.11" }
python-dotenv = "^1.1.0"
tqdm = "^4.67.1"

[tool.poetry.group.dev.dependencies]
pytest = "^8.3.5"
Expand Down Expand Up @@ -69,7 +70,6 @@ force-exclude = '''
)
'''


[tool.isort]
lexicographical = true
group_by_package = true
Expand Down
5 changes: 3 additions & 2 deletions smart_control/environment/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,6 @@ def __init__(
image_generator: (
building_image_generator.BuildingImageGenerator | None
) = None,
step_interval: pd.Timedelta = pd.Timedelta(5, unit="minutes"),
writer_factory: writer_lib.BaseWriterFactory | None = None,
) -> None:
"""Environment constructor.
Expand Down Expand Up @@ -427,7 +426,9 @@ def __init__(
self._end_timestamp: pd.Timestamp = self._start_timestamp + pd.Timedelta(
num_days_in_episode, unit="days"
)
self._step_interval = step_interval
self._step_interval = self.building.time_step_sec * pd.Timedelta(
1, unit="seconds"
)
self._num_timesteps_in_episode = int(
(self._end_timestamp - self._start_timestamp) / self._step_interval
)
Expand Down
5 changes: 1 addition & 4 deletions smart_control/environment/environment_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -720,7 +720,7 @@ def test_step(self):
(pd.Timedelta(1, unit="minute")),
(pd.Timedelta(1, unit="hour")),
)
def test_validate_environment(self, step_interval):
def test_validate_environment(self):
class TerminatingEnv(environment.Environment):
"""Environment that terminates after a fixed number of steps.

Expand All @@ -734,15 +734,13 @@ def __init__(
obs_normalizer,
action_config,
discount_factor: float = 1,
step_interval: pd.Timedelta = pd.Timedelta(1, unit="minute"),
):
super().__init__(
building,
reward_function,
obs_normalizer,
action_config,
discount_factor,
step_interval=step_interval,
)
self.counter = 0

Expand All @@ -762,7 +760,6 @@ def _step(self, action) -> ts.TimeStep:
reward_function,
obs_normalizer,
action_config,
step_interval=step_interval,
)

utils.validate_py_environment(env, episodes=5)
Expand Down
143 changes: 143 additions & 0 deletions smart_control/reinforcement_learning/agents/ddpg_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
"""DDPG Agent implementation.

This module provides a function to create a DDPG agent with customizable
parameters.
"""

from typing import Optional, Sequence

import tensorflow as tf
from tf_agents.agents import tf_agent
from tf_agents.agents.ddpg import ddpg_agent
from tf_agents.networks import network
from tf_agents.typing import types

from smart_control.reinforcement_learning.agents.networks.ddpg_networks import create_sequential_actor_network
from smart_control.reinforcement_learning.agents.networks.ddpg_networks import create_sequential_critic_network


def create_ddpg_agent(
time_step_spec: types.TimeStep,
action_spec: types.NestedTensorSpec,
# Actor network parameters
actor_fc_layers: Sequence[int] = (128, 128),
actor_network: Optional[network.Network] = None,
# Critic network parameters
critic_obs_fc_layers: Sequence[int] = (128, 64),
critic_action_fc_layers: Sequence[int] = (128, 64),
critic_joint_fc_layers: Sequence[int] = (128, 64),
critic_network: Optional[network.Network] = None,
# Optimizer parameters
actor_learning_rate: float = 3e-4,
critic_learning_rate: float = 3e-4,
# Agent parameters
ou_stddev: float = 1.0,
ou_damping: float = 1.0,
gamma: float = 0.99,
target_update_tau: float = 0.005,
target_update_period: int = 1,
reward_scale_factor: float = 1.0,
# Training parameters
gradient_clipping: Optional[float] = None,
debug_summaries: bool = False,
summarize_grads_and_vars: bool = False,
train_step_counter: Optional[tf.Variable] = None,
) -> tf_agent.TFAgent:
"""Creates a DDPG Agent.

Args:
time_step_spec: A `TimeStep` spec of the expected time_steps.

action_spec: A nest of BoundedTensorSpec representing the actions.

actor_fc_layers: Iterable of fully connected layer units for the actor
network.

actor_network: Optional custom actor network to use.

critic_obs_fc_layers: Iterable of fully connected layer units for the
critic observation network.

critic_action_fc_layers: Iterable of fully connected layer units for the
critic action network.

critic_joint_fc_layers: Iterable of fully connected layer units for the
joint part of the critic network.

critic_network: Optional custom critic network to use.

actor_learning_rate: Actor network learning rate.

critic_learning_rate: Critic network learning rate.

ou_stddev: Standard deviation for the Ornstein-Uhlenbeck (OU) noise added
for exploration.

ou_damping: Damping factor for the OU noise.

gamma: Discount factor for future rewards.

target_update_tau: Factor for soft update of target networks.

target_update_period: Period for soft update of target networks.

reward_scale_factor: Multiplicative scale for the reward.

gradient_clipping: Norm length to clip gradients.

debug_summaries: Whether to emit debug summaries.

summarize_grads_and_vars: Whether to summarize gradients and variables.

train_step_counter: An optional counter to increment every time the train
op is run. Defaults to the global_step.

Returns:
A TFAgent instance with the DDPG agent.
"""
# Create train step counter if not provided
if train_step_counter is None:
train_step_counter = tf.Variable(0, trainable=False, dtype=tf.int64)

# Create networks if not provided
if actor_network is None:
actor_network = create_sequential_actor_network(
actor_fc_layers=actor_fc_layers, action_tensor_spec=action_spec
)

if critic_network is None:
critic_network = create_sequential_critic_network(
obs_fc_layer_units=critic_obs_fc_layers,
action_fc_layer_units=critic_action_fc_layers,
joint_fc_layer_units=critic_joint_fc_layers,
)

# Create agent
ddpg_tf_agent = ddpg_agent.DdpgAgent(
time_step_spec=time_step_spec,
action_spec=action_spec,
actor_network=actor_network,
critic_network=critic_network,
actor_optimizer=tf.keras.optimizers.Adam(
learning_rate=actor_learning_rate
),
critic_optimizer=tf.keras.optimizers.Adam(
learning_rate=critic_learning_rate
),
ou_stddev=ou_stddev,
ou_damping=ou_damping,
target_update_tau=target_update_tau,
target_update_period=target_update_period,
td_errors_loss_fn=tf.math.squared_difference,
gamma=gamma,
reward_scale_factor=reward_scale_factor,
gradient_clipping=gradient_clipping,
debug_summaries=debug_summaries,
summarize_grads_and_vars=summarize_grads_and_vars,
train_step_counter=train_step_counter,
)

# Initialize the agent
ddpg_tf_agent.initialize()

return ddpg_tf_agent
132 changes: 132 additions & 0 deletions smart_control/reinforcement_learning/agents/networks/ddpg_networks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
"""Network architectures for DDPG agent.

This module provides functions to create actor and critic networks for
DDPG agents.
"""

import functools
from typing import Sequence

import tensorflow as tf
from tf_agents.keras_layers import inner_reshape
from tf_agents.networks import nest_map
from tf_agents.networks import sequential
from tf_agents.typing import types
from tf_agents.utils import common

# Utility to create dense layers with consistent initialization and activation
dense = functools.partial(
tf.keras.layers.Dense,
activation=tf.keras.activations.relu,
kernel_initializer=tf.compat.v1.variance_scaling_initializer(
scale=1.0 / 3.0, mode='fan_in', distribution='uniform'
),
)


def create_identity_layer() -> tf.keras.layers.Layer:
"""Creates an identity layer.

Returns:
A Lambda layer that returns its input.
"""
return tf.keras.layers.Lambda(lambda x: x)


def create_fc_network(layer_units: Sequence[int]) -> tf.keras.Model:
"""Creates a fully connected network.

Args:
layer_units: A sequence of layer units.

Returns:
A sequential model of dense layers.
"""
return sequential.Sequential([dense(num_units) for num_units in layer_units])


def create_sequential_actor_network(
actor_fc_layers: Sequence[int],
action_tensor_spec: types.NestedTensorSpec,
) -> sequential.Sequential:
"""Create a sequential actor network for DDPG.

Args:
actor_fc_layers: Units for actor network fully connected layers.
action_tensor_spec: The action tensor spec.

Returns:
A sequential actor network.
"""
flat_action_spec = tf.nest.flatten(action_tensor_spec)
if len(flat_action_spec) > 1:
raise ValueError('Only a single action tensor is supported by this network')
flat_action_spec = flat_action_spec[0]

fc_layers = [dense(num_units) for num_units in actor_fc_layers]
num_actions = flat_action_spec.shape.num_elements()
action_fc_layer = tf.keras.layers.Dense(
num_actions,
activation=tf.keras.activations.tanh,
kernel_initializer=tf.keras.initializers.RandomUniform(
minval=-0.003, maxval=0.003
),
)

scaling_layer = tf.keras.layers.Lambda(
lambda x: common.scale_to_spec(x, flat_action_spec)
)
return sequential.Sequential(fc_layers + [action_fc_layer, scaling_layer])


def create_sequential_critic_network(
obs_fc_layer_units: Sequence[int],
action_fc_layer_units: Sequence[int],
joint_fc_layer_units: Sequence[int],
) -> sequential.Sequential:
"""Create a sequential critic network for DDPG.

Args:
obs_fc_layer_units: Units for observation network layers.
action_fc_layer_units: Units for action network layers.
joint_fc_layer_units: Units for joint network layers.

Returns:
A sequential critic network.
"""

def split_inputs(inputs):
return {'observation': inputs[0], 'action': inputs[1]}

obs_network = (
create_fc_network(obs_fc_layer_units)
if obs_fc_layer_units
else create_identity_layer()
)
action_network = (
create_fc_network(action_fc_layer_units)
if action_fc_layer_units
else create_identity_layer()
)
joint_network = (
create_fc_network(joint_fc_layer_units)
if joint_fc_layer_units
else create_identity_layer()
)
value_fc_layer = tf.keras.layers.Dense(
1,
activation=None,
kernel_initializer=tf.keras.initializers.RandomUniform(
minval=-0.003, maxval=0.003
),
)

return sequential.Sequential([
tf.keras.layers.Lambda(split_inputs),
nest_map.NestMap({'observation': obs_network, 'action': action_network}),
nest_map.NestFlatten(),
tf.keras.layers.Concatenate(),
joint_network,
value_fc_layer,
inner_reshape.InnerReshape([1], []),
])
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ def call(self, inputs, **kwargs):
kwargs['outer_rank'] = self.predefined_outer_rank
if 'step_type' in kwargs:
del kwargs['step_type']
del kwargs[
'network_state'
] # was getting error saying that this argument was unexpected in
# the call below
return super(_TanhNormalProjectionNetworkWrapper, self).call(
inputs, **kwargs
)
Expand Down
Empty file.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"starter_buffer_path": "/app/smart_control/utils/../../smart_control/reinforcement_learning/replay_buffer_data/initial_exploration_buffer",
"experiment_name": "test_training_run_1day",
"agent_type": "sac",
"train_iterations": 10,
"collect_steps_per_iteration": 50,
"batch_size": 256,
"log_interval": 1,
"eval_interval": 10,
"num_eval_episodes": 1,
"checkpoint_interval": 10,
"learner_iterations": 200,
"scenario_config_path": "/tmp/gin_configs/config_timestepsec-300_numdaysinepisode-1_starttimestamp-2023-07-06.gin",
"timestamp": "2025_06_19-21:10:54"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Experiment Parameters:
=====================

starter_buffer_path: /app/smart_control/utils/../../smart_control/reinforcement_learning/replay_buffer_data/initial_exploration_buffer
experiment_name: test_training_run_1day
agent_type: sac
train_iterations: 10
collect_steps_per_iteration: 50
batch_size: 256
log_interval: 1
eval_interval: 10
num_eval_episodes: 1
checkpoint_interval: 10
learner_iterations: 200
scenario_config_path: /tmp/gin_configs/config_timestepsec-300_numdaysinepisode-1_starttimestamp-2023-07-06.gin
timestamp: 2025_06_19-21:10:54
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading
Loading