diff --git a/config.yaml b/config.yaml index 19dfe6e..e4ca81e 100644 --- a/config.yaml +++ b/config.yaml @@ -69,5 +69,5 @@ testing: reward: throughput_weight: 1.0 # Weight for throughput reward speed_weight: 0.5 # Weight for average speed reward - density_weight: -0.3 # Weight for density penalty + density_weight: 0.3 # Weight for density penalty action_change_weight: -0.1 # Weight for action change penalty \ No newline at end of file diff --git a/ctm_model.py b/ctm_model.py index 0ddcc74..7c2b4ee 100644 --- a/ctm_model.py +++ b/ctm_model.py @@ -69,17 +69,11 @@ class CTMModel: return np.concatenate([self.densities, self.speed_limits]) def _calculate_sending_flow(self, cell_idx: int) -> float: - """ - Calculate sending flow from a cell (vehicles per time step). - - Returns flow in vehicles that can leave the cell during one time step. - """ + """Calculate sending flow from a cell (vehicles per time step).""" density = self.densities[cell_idx] speed_limit = self.speed_limits[cell_idx] effective_speed = min(speed_limit, self.free_flow_speed) - # Flow = density (veh/km) * speed (m/s) * 3.6 to get veh/h, then convert to veh/timestep - # Simplified: density * speed * time_step / 1000 gives vehicles per time step sending_flow = min( density * effective_speed * self.time_step / 1000.0, self.capacity * self.time_step / 1000.0 @@ -87,15 +81,9 @@ class CTMModel: return sending_flow def _calculate_receiving_flow(self, cell_idx: int) -> float: - """ - Calculate receiving flow to a cell (vehicles per time step). - - Returns flow in vehicles that can enter the cell during one time step. - """ + """Calculate receiving flow to a cell (vehicles per time step).""" density = self.densities[cell_idx] - # Receiving capacity based on available space - # congestion_wave_speed (m/s) * (jam_density - density) (veh/km) * time_step / 1000 receiving_flow = min( self.capacity * self.time_step / 1000.0, self.congestion_wave_speed * (self.jam_density - density) * self.time_step / 1000.0 diff --git a/environment.py b/environment.py index 03d275d..863ffc3 100644 --- a/environment.py +++ b/environment.py @@ -66,6 +66,7 @@ class TrafficEnvironment: self.current_step = 0 self.previous_action = None self.episode_metrics = [] + self.current_inflow = 0.0 # Track current inflow for reward calculation def reset(self) -> np.ndarray: """Reset environment to initial state.""" @@ -73,6 +74,7 @@ class TrafficEnvironment: self.current_step = 0 self.previous_action = None self.episode_metrics = [] + self.current_inflow = 0.0 # Reset demand loader if using CSV if self.demand_loader is not None: @@ -99,23 +101,41 @@ class TrafficEnvironment: return max(0, demand) def _calculate_reward(self, info: Dict, action: int) -> float: - """Calculate reward based on traffic metrics.""" + """Calculate reward based on traffic metrics with non-linear normalization.""" throughput = info["throughput"] avg_density = info["average_density"] avg_speed = 0 + count = 0 for i in range(self.ctm.num_cells): if self.ctm.densities[i] > 0: avg_speed += min( self.ctm.speed_limits[i], self.ctm.free_flow_speed * (1 - self.ctm.densities[i] / self.ctm.jam_density) ) - avg_speed /= self.ctm.num_cells + count += 1 + avg_speed = avg_speed / count if count > 0 else 0 + + # Non-linear normalization using tanh, centered on current inflow + # This makes the reward adaptive to traffic conditions + # When inflow is high, we expect high throughput; when low, we expect lower throughput + # inflow_target = max(self.current_inflow, 1000.0) # Avoid division by very small numbers + inflow_target = self.current_inflow # Avoid division by very small numbers + throughput_ratio = throughput / inflow_target + # throughput_ratio: 1.0 means perfect flow (output = input) + # > 1.0 is impossible in steady state, < 1.0 means congestion + throughput_norm = (np.tanh((throughput_ratio - 0.9) / 0.2) + 1.0) / 2.0 + # Speed reward: penalize low speeds more heavily + speed_norm = (avg_speed / self.max_speed_limit) ** 2 + + # Density penalty: exponential penalty for high density + density_ratio = avg_density / self.ctm.critical_density + density_norm = np.exp(-density_ratio) reward = ( - self.throughput_weight * (throughput / 2000.0) - + self.speed_weight * (avg_speed / self.max_speed_limit) - + self.density_weight * (avg_density / self.ctm.critical_density) + self.throughput_weight * throughput_norm + + self.speed_weight * speed_norm + + self.density_weight * (1.0 - density_norm) ) if self.previous_action is not None and action != self.previous_action: @@ -131,6 +151,7 @@ class TrafficEnvironment: self.ctm.set_speed_limit(i, speed_limit) inflow = self._generate_demand() + self.current_inflow = inflow # Store current inflow for reward calculation outflow = 2000.0 next_state, info = self.ctm.step(inflow, outflow)