184 lines
6.8 KiB
Python
184 lines
6.8 KiB
Python
"""Shared reward configuration and calculation for freeway VSL environments."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from typing import Dict, Mapping, Sequence
|
|
|
|
import numpy as np
|
|
|
|
|
|
REWARD_COMPONENT_COLUMNS = (
|
|
"r_outflow",
|
|
"r_bottleneck",
|
|
"r_ctrl",
|
|
)
|
|
|
|
REWARD_COMPONENT_LABELS = {
|
|
"r_outflow": "R_outflow",
|
|
"r_bottleneck": "R_bottleneck",
|
|
"r_ctrl": "R_ctrl",
|
|
}
|
|
|
|
|
|
def clip01(value: float) -> float:
|
|
return float(np.clip(value, 0.0, 1.0))
|
|
|
|
|
|
def init_reward_component_totals() -> Dict[str, float]:
|
|
return {column: 0.0 for column in REWARD_COMPONENT_COLUMNS}
|
|
|
|
|
|
def average_reward_components(totals: Mapping[str, float], steps: int) -> Dict[str, float]:
|
|
denom = max(int(steps), 1)
|
|
return {column: float(totals.get(column, 0.0)) / denom for column in REWARD_COMPONENT_COLUMNS}
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class RewardConfig:
|
|
reward_scale: float = 10.0
|
|
outflow_weight: float = 0.75
|
|
bottleneck_weight: float = 0.20
|
|
control_weight_start: float = 0.05
|
|
control_weight_end: float = 0.01
|
|
control_weight_decay_power: float = 1.5
|
|
mainline_discharge_ref_vehph: float = 4924.0
|
|
bottleneck_critical_occupancy: float = 15.0
|
|
bottleneck_excess_occupancy_band: float = 10.0
|
|
bottleneck_window_size: int = 3
|
|
control_temporal_weight: float = 0.7
|
|
control_spatial_weight: float = 0.3
|
|
delta_vsl_max: float = 0.0
|
|
d_threshold: float = 3.0
|
|
v_limit: float = 33.33
|
|
leader_gap_threshold_m: float = 100.0
|
|
|
|
@classmethod
|
|
def from_dict(
|
|
cls,
|
|
raw_cfg: Mapping[str, object],
|
|
*,
|
|
speed_actions_ms: Sequence[float],
|
|
) -> "RewardConfig":
|
|
default_delta_vsl_max = 0.0
|
|
if len(speed_actions_ms) > 0:
|
|
default_delta_vsl_max = float(np.max(speed_actions_ms) - np.min(speed_actions_ms))
|
|
|
|
return cls(
|
|
reward_scale=float(raw_cfg.get("reward_scale", 10.0)),
|
|
outflow_weight=float(raw_cfg.get("outflow_weight", 0.75)),
|
|
bottleneck_weight=float(raw_cfg.get("bottleneck_weight", 0.20)),
|
|
control_weight_start=float(raw_cfg.get("control_weight_start", 0.05)),
|
|
control_weight_end=float(raw_cfg.get("control_weight_end", 0.01)),
|
|
control_weight_decay_power=float(raw_cfg.get("control_weight_decay_power", 1.5)),
|
|
mainline_discharge_ref_vehph=float(raw_cfg.get("mainline_discharge_ref_vehph", 4924.0)),
|
|
bottleneck_critical_occupancy=float(raw_cfg.get("bottleneck_critical_occupancy", 15.0)),
|
|
bottleneck_excess_occupancy_band=float(
|
|
raw_cfg.get("bottleneck_excess_occupancy_band", 10.0)
|
|
),
|
|
bottleneck_window_size=max(1, int(raw_cfg.get("bottleneck_window_size", 3))),
|
|
control_temporal_weight=float(raw_cfg.get("control_temporal_weight", 0.7)),
|
|
control_spatial_weight=float(raw_cfg.get("control_spatial_weight", 0.3)),
|
|
delta_vsl_max=float(raw_cfg.get("delta_vsl_max", default_delta_vsl_max)),
|
|
d_threshold=float(raw_cfg.get("d_threshold", 3.0)),
|
|
v_limit=float(raw_cfg.get("v_limit", 33.33)),
|
|
leader_gap_threshold_m=float(raw_cfg.get("leader_gap_threshold_m", 100.0)),
|
|
)
|
|
|
|
|
|
class RewardCalculator:
|
|
"""Encapsulates a minimal reward for mainline bottleneck VSL control."""
|
|
|
|
def __init__(
|
|
self,
|
|
*,
|
|
config: RewardConfig,
|
|
controlled_edge_start_index: int,
|
|
total_training_episodes: int,
|
|
evaluation_mode: bool = False,
|
|
):
|
|
self.config = config
|
|
self.controlled_edge_start_index = int(controlled_edge_start_index)
|
|
self.total_training_episodes = max(int(total_training_episodes), 1)
|
|
self.evaluation_mode = bool(evaluation_mode)
|
|
|
|
def get_control_weight(self, episode_index: int) -> float:
|
|
if self.evaluation_mode:
|
|
return float(self.config.control_weight_end)
|
|
|
|
if self.total_training_episodes <= 1:
|
|
progress = 1.0
|
|
else:
|
|
progress = clip01((float(episode_index) - 1.0) / (self.total_training_episodes - 1.0))
|
|
|
|
decay = (1.0 - progress) ** max(self.config.control_weight_decay_power, 0.0)
|
|
return float(
|
|
self.config.control_weight_end
|
|
+ (self.config.control_weight_start - self.config.control_weight_end) * decay
|
|
)
|
|
|
|
def calculate(
|
|
self,
|
|
*,
|
|
info: Dict,
|
|
current_edge_speeds: np.ndarray,
|
|
prev_edge_speeds: np.ndarray,
|
|
episode_index: int,
|
|
) -> float:
|
|
downstream_mainline_outflow = float(info.get("downstream_mainline_outflow", 0.0))
|
|
r_outflow = clip01(
|
|
downstream_mainline_outflow / max(self.config.mainline_discharge_ref_vehph, 1e-6)
|
|
)
|
|
|
|
bottleneck_occupancy = float(info.get("bottleneck_occupancy", 0.0))
|
|
excess_occupancy = max(
|
|
bottleneck_occupancy - self.config.bottleneck_critical_occupancy,
|
|
0.0,
|
|
)
|
|
bottleneck_excess_norm = clip01(
|
|
excess_occupancy / max(self.config.bottleneck_excess_occupancy_band, 1e-6)
|
|
)
|
|
r_bottleneck = -bottleneck_excess_norm
|
|
|
|
active_start = self.controlled_edge_start_index
|
|
current_active_speeds = np.asarray(current_edge_speeds[active_start:], dtype=float)
|
|
prev_active_speeds = np.asarray(prev_edge_speeds[active_start:], dtype=float)
|
|
|
|
if current_active_speeds.size > 0:
|
|
temporal_control_change = float(
|
|
np.mean(np.abs(current_active_speeds - prev_active_speeds))
|
|
/ max(self.config.delta_vsl_max, 1e-6)
|
|
)
|
|
else:
|
|
temporal_control_change = 0.0
|
|
if current_active_speeds.size >= 2:
|
|
spatial_control_change = float(
|
|
np.mean(np.abs(np.diff(current_active_speeds)))
|
|
/ max(self.config.delta_vsl_max, 1e-6)
|
|
)
|
|
else:
|
|
spatial_control_change = 0.0
|
|
|
|
control_change_norm = clip01(
|
|
self.config.control_temporal_weight * clip01(temporal_control_change)
|
|
+ self.config.control_spatial_weight * clip01(spatial_control_change)
|
|
)
|
|
r_ctrl = -control_change_norm
|
|
|
|
control_weight = self.get_control_weight(episode_index)
|
|
|
|
info["r_outflow"] = float(r_outflow)
|
|
info["r_bottleneck"] = float(r_bottleneck)
|
|
info["r_ctrl"] = float(r_ctrl)
|
|
info["bottleneck_excess_occupancy_norm"] = float(bottleneck_excess_norm)
|
|
info["temporal_control_change_norm"] = float(clip01(temporal_control_change))
|
|
info["spatial_control_change_norm"] = float(clip01(spatial_control_change))
|
|
info["control_weight"] = float(control_weight)
|
|
|
|
reward = (
|
|
self.config.outflow_weight * r_outflow
|
|
+ self.config.bottleneck_weight * r_bottleneck
|
|
+ control_weight * r_ctrl
|
|
)
|
|
return float(reward * self.config.reward_scale)
|