181 lines
6.9 KiB
Python
181 lines
6.9 KiB
Python
"""
|
|
Training environment for DQN-based speed limit control.
|
|
"""
|
|
import numpy as np
|
|
from typing import Tuple, Dict, Optional
|
|
from ctm_model import CTMModel
|
|
from demand_loader import DemandLoader
|
|
|
|
|
|
class TrafficEnvironment:
|
|
"""Traffic environment for speed limit control."""
|
|
|
|
def __init__(self, config: dict):
|
|
"""Initialize environment."""
|
|
env_config = config["environment"]
|
|
reward_config = config["reward"]
|
|
|
|
self.ctm = CTMModel(
|
|
num_cells=env_config["num_cells"],
|
|
cell_length=env_config["cell_length"],
|
|
free_flow_speed=env_config["free_flow_speed"],
|
|
congestion_wave_speed=env_config["congestion_wave_speed"],
|
|
max_density=env_config["max_density"],
|
|
critical_density=env_config["critical_density"],
|
|
jam_density=env_config["jam_density"],
|
|
time_step=env_config["time_step"],
|
|
)
|
|
|
|
self.demand_mean = env_config["demand_mean"]
|
|
self.demand_std = env_config["demand_std"]
|
|
self.demand_pattern = env_config["demand_pattern"]
|
|
|
|
# Initialize demand loader for CSV input
|
|
self.demand_loader = None
|
|
if self.demand_pattern == "csv":
|
|
csv_path = env_config.get("demand_csv_path")
|
|
csv_column = env_config.get("demand_csv_column", "demand")
|
|
if csv_path is None:
|
|
raise ValueError("demand_csv_path must be specified when demand_pattern is 'csv'")
|
|
self.demand_loader = DemandLoader(
|
|
csv_path=csv_path,
|
|
time_step=env_config["time_step"],
|
|
demand_column=csv_column
|
|
)
|
|
print(f"Using CSV demand from: {csv_path}")
|
|
stats = self.demand_loader.get_statistics()
|
|
if stats:
|
|
print(f"Demand statistics: mean={stats['mean']:.1f}, std={stats['std']:.1f}, "
|
|
f"min={stats['min']:.1f}, max={stats['max']:.1f}")
|
|
|
|
self.min_speed_limit = env_config["min_speed_limit"]
|
|
self.max_speed_limit = env_config["max_speed_limit"]
|
|
self.num_speed_actions = env_config["num_speed_actions"]
|
|
self.episode_length = env_config["episode_length"]
|
|
self.time_step = env_config["time_step"]
|
|
|
|
self.throughput_weight = reward_config["throughput_weight"]
|
|
self.speed_weight = reward_config["speed_weight"]
|
|
self.density_weight = reward_config["density_weight"]
|
|
self.action_change_weight = reward_config["action_change_weight"]
|
|
|
|
self.speed_actions = np.linspace(
|
|
self.min_speed_limit, self.max_speed_limit, self.num_speed_actions
|
|
)
|
|
|
|
self.current_step = 0
|
|
self.previous_action = None
|
|
self.episode_metrics = []
|
|
self.current_inflow = 0.0 # Track current inflow for reward calculation
|
|
|
|
def reset(self) -> np.ndarray:
|
|
"""Reset environment to initial state."""
|
|
self.ctm.reset()
|
|
self.current_step = 0
|
|
self.previous_action = None
|
|
self.episode_metrics = []
|
|
self.current_inflow = 0.0
|
|
|
|
# Reset demand loader if using CSV
|
|
if self.demand_loader is not None:
|
|
self.demand_loader.reset()
|
|
|
|
return self.ctm.get_state()
|
|
|
|
def _generate_demand(self) -> float:
|
|
"""Generate traffic demand based on pattern."""
|
|
if self.demand_pattern == "constant":
|
|
demand = self.demand_mean
|
|
elif self.demand_pattern == "sine":
|
|
t = self.current_step * self.time_step / 3600.0
|
|
demand = self.demand_mean + self.demand_std * np.sin(2 * np.pi * t / 2.0)
|
|
elif self.demand_pattern == "random":
|
|
demand = np.random.normal(self.demand_mean, self.demand_std)
|
|
elif self.demand_pattern == "csv":
|
|
if self.demand_loader is None:
|
|
raise RuntimeError("Demand loader not initialized for CSV pattern")
|
|
demand = self.demand_loader.get_demand(self.current_step)
|
|
else:
|
|
demand = self.demand_mean
|
|
|
|
return max(0, demand)
|
|
|
|
def _calculate_reward(self, info: Dict, action: int) -> float:
|
|
"""Calculate reward based on traffic metrics with non-linear normalization."""
|
|
throughput = info["throughput"]
|
|
avg_density = info["average_density"]
|
|
|
|
avg_speed = 0
|
|
count = 0
|
|
for i in range(self.ctm.num_cells):
|
|
if self.ctm.densities[i] > 0:
|
|
avg_speed += min(
|
|
self.ctm.speed_limits[i],
|
|
self.ctm.free_flow_speed * (1 - self.ctm.densities[i] / self.ctm.jam_density)
|
|
)
|
|
count += 1
|
|
avg_speed = avg_speed / count if count > 0 else 0
|
|
|
|
# Non-linear normalization using tanh, centered on current inflow
|
|
# This makes the reward adaptive to traffic conditions
|
|
# When inflow is high, we expect high throughput; when low, we expect lower throughput
|
|
# inflow_target = max(self.current_inflow, 1000.0) # Avoid division by very small numbers
|
|
inflow_target = self.current_inflow # Avoid division by very small numbers
|
|
throughput_ratio = throughput / inflow_target
|
|
# throughput_ratio: 1.0 means perfect flow (output = input)
|
|
# > 1.0 is impossible in steady state, < 1.0 means congestion
|
|
throughput_norm = (np.tanh((throughput_ratio - 0.9) / 0.2) + 1.0) / 2.0
|
|
# Speed reward: penalize low speeds more heavily
|
|
speed_norm = (avg_speed / self.max_speed_limit) ** 2
|
|
|
|
# Density penalty: exponential penalty for high density
|
|
density_ratio = avg_density / self.ctm.critical_density
|
|
density_norm = np.exp(-density_ratio)
|
|
|
|
reward = (
|
|
self.throughput_weight * throughput_norm
|
|
+ self.speed_weight * speed_norm
|
|
+ self.density_weight * (1.0 - density_norm)
|
|
)
|
|
|
|
if self.previous_action is not None and action != self.previous_action:
|
|
reward += self.action_change_weight
|
|
|
|
return reward
|
|
|
|
def step(self, action: int) -> Tuple[np.ndarray, float, bool, Dict]:
|
|
"""Execute one step in the environment."""
|
|
speed_limit = self.speed_actions[action]
|
|
|
|
for i in range(self.ctm.num_cells):
|
|
self.ctm.set_speed_limit(i, speed_limit)
|
|
|
|
inflow = self._generate_demand()
|
|
self.current_inflow = inflow # Store current inflow for reward calculation
|
|
outflow = 2000.0
|
|
|
|
next_state, info = self.ctm.step(inflow, outflow)
|
|
|
|
reward = self._calculate_reward(info, action)
|
|
|
|
self.current_step += 1
|
|
done = self.current_step >= self.episode_length
|
|
|
|
self.episode_metrics.append(info)
|
|
self.previous_action = action
|
|
|
|
info["reward"] = reward
|
|
info["step"] = self.current_step
|
|
|
|
return next_state, reward, done, info
|
|
|
|
@property
|
|
def state_dim(self) -> int:
|
|
"""Get state dimension."""
|
|
return self.ctm.num_cells * 2
|
|
|
|
@property
|
|
def action_dim(self) -> int:
|
|
"""Get action dimension."""
|
|
return self.num_speed_actions
|