调整为动态指标,并将奖励归一化
This commit is contained in:
parent
9fa47127ab
commit
25f89717f1
|
|
@ -69,5 +69,5 @@ testing:
|
||||||
reward:
|
reward:
|
||||||
throughput_weight: 1.0 # Weight for throughput reward
|
throughput_weight: 1.0 # Weight for throughput reward
|
||||||
speed_weight: 0.5 # Weight for average speed reward
|
speed_weight: 0.5 # Weight for average speed reward
|
||||||
density_weight: -0.3 # Weight for density penalty
|
density_weight: 0.3 # Weight for density penalty
|
||||||
action_change_weight: -0.1 # Weight for action change penalty
|
action_change_weight: -0.1 # Weight for action change penalty
|
||||||
16
ctm_model.py
16
ctm_model.py
|
|
@ -69,17 +69,11 @@ class CTMModel:
|
||||||
return np.concatenate([self.densities, self.speed_limits])
|
return np.concatenate([self.densities, self.speed_limits])
|
||||||
|
|
||||||
def _calculate_sending_flow(self, cell_idx: int) -> float:
|
def _calculate_sending_flow(self, cell_idx: int) -> float:
|
||||||
"""
|
"""Calculate sending flow from a cell (vehicles per time step)."""
|
||||||
Calculate sending flow from a cell (vehicles per time step).
|
|
||||||
|
|
||||||
Returns flow in vehicles that can leave the cell during one time step.
|
|
||||||
"""
|
|
||||||
density = self.densities[cell_idx]
|
density = self.densities[cell_idx]
|
||||||
speed_limit = self.speed_limits[cell_idx]
|
speed_limit = self.speed_limits[cell_idx]
|
||||||
|
|
||||||
effective_speed = min(speed_limit, self.free_flow_speed)
|
effective_speed = min(speed_limit, self.free_flow_speed)
|
||||||
# Flow = density (veh/km) * speed (m/s) * 3.6 to get veh/h, then convert to veh/timestep
|
|
||||||
# Simplified: density * speed * time_step / 1000 gives vehicles per time step
|
|
||||||
sending_flow = min(
|
sending_flow = min(
|
||||||
density * effective_speed * self.time_step / 1000.0,
|
density * effective_speed * self.time_step / 1000.0,
|
||||||
self.capacity * self.time_step / 1000.0
|
self.capacity * self.time_step / 1000.0
|
||||||
|
|
@ -87,15 +81,9 @@ class CTMModel:
|
||||||
return sending_flow
|
return sending_flow
|
||||||
|
|
||||||
def _calculate_receiving_flow(self, cell_idx: int) -> float:
|
def _calculate_receiving_flow(self, cell_idx: int) -> float:
|
||||||
"""
|
"""Calculate receiving flow to a cell (vehicles per time step)."""
|
||||||
Calculate receiving flow to a cell (vehicles per time step).
|
|
||||||
|
|
||||||
Returns flow in vehicles that can enter the cell during one time step.
|
|
||||||
"""
|
|
||||||
density = self.densities[cell_idx]
|
density = self.densities[cell_idx]
|
||||||
|
|
||||||
# Receiving capacity based on available space
|
|
||||||
# congestion_wave_speed (m/s) * (jam_density - density) (veh/km) * time_step / 1000
|
|
||||||
receiving_flow = min(
|
receiving_flow = min(
|
||||||
self.capacity * self.time_step / 1000.0,
|
self.capacity * self.time_step / 1000.0,
|
||||||
self.congestion_wave_speed * (self.jam_density - density) * self.time_step / 1000.0
|
self.congestion_wave_speed * (self.jam_density - density) * self.time_step / 1000.0
|
||||||
|
|
|
||||||
|
|
@ -66,6 +66,7 @@ class TrafficEnvironment:
|
||||||
self.current_step = 0
|
self.current_step = 0
|
||||||
self.previous_action = None
|
self.previous_action = None
|
||||||
self.episode_metrics = []
|
self.episode_metrics = []
|
||||||
|
self.current_inflow = 0.0 # Track current inflow for reward calculation
|
||||||
|
|
||||||
def reset(self) -> np.ndarray:
|
def reset(self) -> np.ndarray:
|
||||||
"""Reset environment to initial state."""
|
"""Reset environment to initial state."""
|
||||||
|
|
@ -73,6 +74,7 @@ class TrafficEnvironment:
|
||||||
self.current_step = 0
|
self.current_step = 0
|
||||||
self.previous_action = None
|
self.previous_action = None
|
||||||
self.episode_metrics = []
|
self.episode_metrics = []
|
||||||
|
self.current_inflow = 0.0
|
||||||
|
|
||||||
# Reset demand loader if using CSV
|
# Reset demand loader if using CSV
|
||||||
if self.demand_loader is not None:
|
if self.demand_loader is not None:
|
||||||
|
|
@ -99,23 +101,41 @@ class TrafficEnvironment:
|
||||||
return max(0, demand)
|
return max(0, demand)
|
||||||
|
|
||||||
def _calculate_reward(self, info: Dict, action: int) -> float:
|
def _calculate_reward(self, info: Dict, action: int) -> float:
|
||||||
"""Calculate reward based on traffic metrics."""
|
"""Calculate reward based on traffic metrics with non-linear normalization."""
|
||||||
throughput = info["throughput"]
|
throughput = info["throughput"]
|
||||||
avg_density = info["average_density"]
|
avg_density = info["average_density"]
|
||||||
|
|
||||||
avg_speed = 0
|
avg_speed = 0
|
||||||
|
count = 0
|
||||||
for i in range(self.ctm.num_cells):
|
for i in range(self.ctm.num_cells):
|
||||||
if self.ctm.densities[i] > 0:
|
if self.ctm.densities[i] > 0:
|
||||||
avg_speed += min(
|
avg_speed += min(
|
||||||
self.ctm.speed_limits[i],
|
self.ctm.speed_limits[i],
|
||||||
self.ctm.free_flow_speed * (1 - self.ctm.densities[i] / self.ctm.jam_density)
|
self.ctm.free_flow_speed * (1 - self.ctm.densities[i] / self.ctm.jam_density)
|
||||||
)
|
)
|
||||||
avg_speed /= self.ctm.num_cells
|
count += 1
|
||||||
|
avg_speed = avg_speed / count if count > 0 else 0
|
||||||
|
|
||||||
|
# Non-linear normalization using tanh, centered on current inflow
|
||||||
|
# This makes the reward adaptive to traffic conditions
|
||||||
|
# When inflow is high, we expect high throughput; when low, we expect lower throughput
|
||||||
|
# inflow_target = max(self.current_inflow, 1000.0) # Avoid division by very small numbers
|
||||||
|
inflow_target = self.current_inflow # Avoid division by very small numbers
|
||||||
|
throughput_ratio = throughput / inflow_target
|
||||||
|
# throughput_ratio: 1.0 means perfect flow (output = input)
|
||||||
|
# > 1.0 is impossible in steady state, < 1.0 means congestion
|
||||||
|
throughput_norm = (np.tanh((throughput_ratio - 0.9) / 0.2) + 1.0) / 2.0
|
||||||
|
# Speed reward: penalize low speeds more heavily
|
||||||
|
speed_norm = (avg_speed / self.max_speed_limit) ** 2
|
||||||
|
|
||||||
|
# Density penalty: exponential penalty for high density
|
||||||
|
density_ratio = avg_density / self.ctm.critical_density
|
||||||
|
density_norm = np.exp(-density_ratio)
|
||||||
|
|
||||||
reward = (
|
reward = (
|
||||||
self.throughput_weight * (throughput / 2000.0)
|
self.throughput_weight * throughput_norm
|
||||||
+ self.speed_weight * (avg_speed / self.max_speed_limit)
|
+ self.speed_weight * speed_norm
|
||||||
+ self.density_weight * (avg_density / self.ctm.critical_density)
|
+ self.density_weight * (1.0 - density_norm)
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.previous_action is not None and action != self.previous_action:
|
if self.previous_action is not None and action != self.previous_action:
|
||||||
|
|
@ -131,6 +151,7 @@ class TrafficEnvironment:
|
||||||
self.ctm.set_speed_limit(i, speed_limit)
|
self.ctm.set_speed_limit(i, speed_limit)
|
||||||
|
|
||||||
inflow = self._generate_demand()
|
inflow = self._generate_demand()
|
||||||
|
self.current_inflow = inflow # Store current inflow for reward calculation
|
||||||
outflow = 2000.0
|
outflow = 2000.0
|
||||||
|
|
||||||
next_state, info = self.ctm.step(inflow, outflow)
|
next_state, info = self.ctm.step(inflow, outflow)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue