ctm-dqn/environment.py

181 lines
6.9 KiB
Python

"""
Training environment for DQN-based speed limit control.
"""
import numpy as np
from typing import Tuple, Dict, Optional
from ctm_model import CTMModel
from demand_loader import DemandLoader
class TrafficEnvironment:
"""Traffic environment for speed limit control."""
def __init__(self, config: dict):
"""Initialize environment."""
env_config = config["environment"]
reward_config = config["reward"]
self.ctm = CTMModel(
num_cells=env_config["num_cells"],
cell_length=env_config["cell_length"],
free_flow_speed=env_config["free_flow_speed"],
congestion_wave_speed=env_config["congestion_wave_speed"],
max_density=env_config["max_density"],
critical_density=env_config["critical_density"],
jam_density=env_config["jam_density"],
time_step=env_config["time_step"],
)
self.demand_mean = env_config["demand_mean"]
self.demand_std = env_config["demand_std"]
self.demand_pattern = env_config["demand_pattern"]
# Initialize demand loader for CSV input
self.demand_loader = None
if self.demand_pattern == "csv":
csv_path = env_config.get("demand_csv_path")
csv_column = env_config.get("demand_csv_column", "demand")
if csv_path is None:
raise ValueError("demand_csv_path must be specified when demand_pattern is 'csv'")
self.demand_loader = DemandLoader(
csv_path=csv_path,
time_step=env_config["time_step"],
demand_column=csv_column
)
print(f"Using CSV demand from: {csv_path}")
stats = self.demand_loader.get_statistics()
if stats:
print(f"Demand statistics: mean={stats['mean']:.1f}, std={stats['std']:.1f}, "
f"min={stats['min']:.1f}, max={stats['max']:.1f}")
self.min_speed_limit = env_config["min_speed_limit"]
self.max_speed_limit = env_config["max_speed_limit"]
self.num_speed_actions = env_config["num_speed_actions"]
self.episode_length = env_config["episode_length"]
self.time_step = env_config["time_step"]
self.throughput_weight = reward_config["throughput_weight"]
self.speed_weight = reward_config["speed_weight"]
self.density_weight = reward_config["density_weight"]
self.action_change_weight = reward_config["action_change_weight"]
self.speed_actions = np.linspace(
self.min_speed_limit, self.max_speed_limit, self.num_speed_actions
)
self.current_step = 0
self.previous_action = None
self.episode_metrics = []
self.current_inflow = 0.0 # Track current inflow for reward calculation
def reset(self) -> np.ndarray:
"""Reset environment to initial state."""
self.ctm.reset()
self.current_step = 0
self.previous_action = None
self.episode_metrics = []
self.current_inflow = 0.0
# Reset demand loader if using CSV
if self.demand_loader is not None:
self.demand_loader.reset()
return self.ctm.get_state()
def _generate_demand(self) -> float:
"""Generate traffic demand based on pattern."""
if self.demand_pattern == "constant":
demand = self.demand_mean
elif self.demand_pattern == "sine":
t = self.current_step * self.time_step / 3600.0
demand = self.demand_mean + self.demand_std * np.sin(2 * np.pi * t / 2.0)
elif self.demand_pattern == "random":
demand = np.random.normal(self.demand_mean, self.demand_std)
elif self.demand_pattern == "csv":
if self.demand_loader is None:
raise RuntimeError("Demand loader not initialized for CSV pattern")
demand = self.demand_loader.get_demand(self.current_step)
else:
demand = self.demand_mean
return max(0, demand)
def _calculate_reward(self, info: Dict, action: int) -> float:
"""Calculate reward based on traffic metrics with non-linear normalization."""
throughput = info["throughput"]
avg_density = info["average_density"]
avg_speed = 0
count = 0
for i in range(self.ctm.num_cells):
if self.ctm.densities[i] > 0:
avg_speed += min(
self.ctm.speed_limits[i],
self.ctm.free_flow_speed * (1 - self.ctm.densities[i] / self.ctm.jam_density)
)
count += 1
avg_speed = avg_speed / count if count > 0 else 0
# Non-linear normalization using tanh, centered on current inflow
# This makes the reward adaptive to traffic conditions
# When inflow is high, we expect high throughput; when low, we expect lower throughput
# inflow_target = max(self.current_inflow, 1000.0) # Avoid division by very small numbers
inflow_target = self.current_inflow # Avoid division by very small numbers
throughput_ratio = throughput / inflow_target
# throughput_ratio: 1.0 means perfect flow (output = input)
# > 1.0 is impossible in steady state, < 1.0 means congestion
throughput_norm = (np.tanh((throughput_ratio - 0.9) / 0.2) + 1.0) / 2.0
# Speed reward: penalize low speeds more heavily
speed_norm = (avg_speed / self.max_speed_limit) ** 2
# Density penalty: exponential penalty for high density
density_ratio = avg_density / self.ctm.critical_density
density_norm = np.exp(-density_ratio)
reward = (
self.throughput_weight * throughput_norm
+ self.speed_weight * speed_norm
+ self.density_weight * (1.0 - density_norm)
)
if self.previous_action is not None and action != self.previous_action:
reward += self.action_change_weight
return reward
def step(self, action: int) -> Tuple[np.ndarray, float, bool, Dict]:
"""Execute one step in the environment."""
speed_limit = self.speed_actions[action]
for i in range(self.ctm.num_cells):
self.ctm.set_speed_limit(i, speed_limit)
inflow = self._generate_demand()
self.current_inflow = inflow # Store current inflow for reward calculation
outflow = 2000.0
next_state, info = self.ctm.step(inflow, outflow)
reward = self._calculate_reward(info, action)
self.current_step += 1
done = self.current_step >= self.episode_length
self.episode_metrics.append(info)
self.previous_action = action
info["reward"] = reward
info["step"] = self.current_step
return next_state, reward, done, info
@property
def state_dim(self) -> int:
"""Get state dimension."""
return self.ctm.num_cells * 2
@property
def action_dim(self) -> int:
"""Get action dimension."""
return self.num_speed_actions