""" Training environment for DQN-based speed limit control. """ import numpy as np from typing import Tuple, Dict, Optional from ctm_model import CTMModel from demand_loader import DemandLoader class TrafficEnvironment: """Traffic environment for speed limit control.""" def __init__(self, config: dict): """Initialize environment.""" env_config = config["environment"] reward_config = config["reward"] self.ctm = CTMModel( num_cells=env_config["num_cells"], cell_length=env_config["cell_length"], free_flow_speed=env_config["free_flow_speed"], congestion_wave_speed=env_config["congestion_wave_speed"], max_density=env_config["max_density"], critical_density=env_config["critical_density"], jam_density=env_config["jam_density"], time_step=env_config["time_step"], ) self.demand_mean = env_config["demand_mean"] self.demand_std = env_config["demand_std"] self.demand_pattern = env_config["demand_pattern"] # Initialize demand loader for CSV input self.demand_loader = None if self.demand_pattern == "csv": csv_path = env_config.get("demand_csv_path") csv_column = env_config.get("demand_csv_column", "demand") if csv_path is None: raise ValueError("demand_csv_path must be specified when demand_pattern is 'csv'") self.demand_loader = DemandLoader( csv_path=csv_path, time_step=env_config["time_step"], demand_column=csv_column ) print(f"Using CSV demand from: {csv_path}") stats = self.demand_loader.get_statistics() if stats: print(f"Demand statistics: mean={stats['mean']:.1f}, std={stats['std']:.1f}, " f"min={stats['min']:.1f}, max={stats['max']:.1f}") self.min_speed_limit = env_config["min_speed_limit"] self.max_speed_limit = env_config["max_speed_limit"] self.num_speed_actions = env_config["num_speed_actions"] self.episode_length = env_config["episode_length"] self.time_step = env_config["time_step"] self.throughput_weight = reward_config["throughput_weight"] self.speed_weight = reward_config["speed_weight"] self.density_weight = reward_config["density_weight"] self.action_change_weight = reward_config["action_change_weight"] self.speed_actions = np.linspace( self.min_speed_limit, self.max_speed_limit, self.num_speed_actions ) self.current_step = 0 self.previous_action = None self.episode_metrics = [] def reset(self) -> np.ndarray: """Reset environment to initial state.""" self.ctm.reset() self.current_step = 0 self.previous_action = None self.episode_metrics = [] # Reset demand loader if using CSV if self.demand_loader is not None: self.demand_loader.reset() return self.ctm.get_state() def _generate_demand(self) -> float: """Generate traffic demand based on pattern.""" if self.demand_pattern == "constant": demand = self.demand_mean elif self.demand_pattern == "sine": t = self.current_step * self.time_step / 3600.0 demand = self.demand_mean + self.demand_std * np.sin(2 * np.pi * t / 2.0) elif self.demand_pattern == "random": demand = np.random.normal(self.demand_mean, self.demand_std) elif self.demand_pattern == "csv": if self.demand_loader is None: raise RuntimeError("Demand loader not initialized for CSV pattern") demand = self.demand_loader.get_demand(self.current_step) else: demand = self.demand_mean return max(0, demand) def _calculate_reward(self, info: Dict, action: int) -> float: """Calculate reward based on traffic metrics.""" throughput = info["throughput"] avg_density = info["average_density"] avg_speed = 0 for i in range(self.ctm.num_cells): if self.ctm.densities[i] > 0: avg_speed += min( self.ctm.speed_limits[i], self.ctm.free_flow_speed * (1 - self.ctm.densities[i] / self.ctm.jam_density) ) avg_speed /= self.ctm.num_cells reward = ( self.throughput_weight * (throughput / 2000.0) + self.speed_weight * (avg_speed / self.max_speed_limit) + self.density_weight * (avg_density / self.ctm.critical_density) ) if self.previous_action is not None and action != self.previous_action: reward += self.action_change_weight return reward def step(self, action: int) -> Tuple[np.ndarray, float, bool, Dict]: """Execute one step in the environment.""" speed_limit = self.speed_actions[action] for i in range(self.ctm.num_cells): self.ctm.set_speed_limit(i, speed_limit) inflow = self._generate_demand() outflow = 2000.0 next_state, info = self.ctm.step(inflow, outflow) reward = self._calculate_reward(info, action) self.current_step += 1 done = self.current_step >= self.episode_length self.episode_metrics.append(info) self.previous_action = action info["reward"] = reward info["step"] = self.current_step return next_state, reward, done, info @property def state_dim(self) -> int: """Get state dimension.""" return self.ctm.num_cells * 2 @property def action_dim(self) -> int: """Get action dimension.""" return self.num_speed_actions