106 lines
3.2 KiB
Python
106 lines
3.2 KiB
Python
"""Shared reward configuration and calculation for freeway VSL environments."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from typing import Dict, Mapping, Sequence
|
|
|
|
import numpy as np
|
|
|
|
|
|
REWARD_COMPONENT_COLUMNS = (
|
|
"r_efficiency",
|
|
"r_safety",
|
|
)
|
|
|
|
REWARD_COMPONENT_LABELS = {
|
|
"r_efficiency": "R_efficiency",
|
|
"r_safety": "R_safety",
|
|
}
|
|
|
|
|
|
def clip01(value: float) -> float:
|
|
return float(np.clip(value, 0.0, 1.0))
|
|
|
|
|
|
def init_reward_component_totals() -> Dict[str, float]:
|
|
return {column: 0.0 for column in REWARD_COMPONENT_COLUMNS}
|
|
|
|
|
|
def average_reward_components(totals: Mapping[str, float], steps: int) -> Dict[str, float]:
|
|
denom = max(int(steps), 1)
|
|
return {column: float(totals.get(column, 0.0)) / denom for column in REWARD_COMPONENT_COLUMNS}
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class RewardConfig:
|
|
reward_scale: float = 10.0
|
|
efficiency_weight: float = 0.75
|
|
safety_weight: float = 0.20
|
|
safety_stop_weight: float = 1.0
|
|
bottleneck_window_size: int = 3
|
|
v_limit: float = 33.33
|
|
leader_gap_threshold_m: float = 100.0
|
|
|
|
@classmethod
|
|
def from_dict(
|
|
cls,
|
|
raw_cfg: Mapping[str, object],
|
|
*,
|
|
speed_actions_ms: Sequence[float],
|
|
) -> "RewardConfig":
|
|
_ = speed_actions_ms
|
|
|
|
return cls(
|
|
reward_scale=float(raw_cfg.get("reward_scale", 10.0)),
|
|
efficiency_weight=float(raw_cfg.get("efficiency_weight", 0.75)),
|
|
safety_weight=float(raw_cfg.get("safety_weight", 0.20)),
|
|
safety_stop_weight=float(raw_cfg.get("safety_stop_weight", 1.0)),
|
|
bottleneck_window_size=max(1, int(raw_cfg.get("bottleneck_window_size", 3))),
|
|
v_limit=float(raw_cfg.get("v_limit", 33.33)),
|
|
leader_gap_threshold_m=float(raw_cfg.get("leader_gap_threshold_m", 100.0)),
|
|
)
|
|
|
|
|
|
class RewardCalculator:
|
|
"""Encapsulates a minimal reward for freeway VSL control."""
|
|
|
|
def __init__(
|
|
self,
|
|
*,
|
|
config: RewardConfig,
|
|
controlled_edge_start_index: int,
|
|
evaluation_mode: bool = False,
|
|
):
|
|
self.config = config
|
|
self.controlled_edge_start_index = int(controlled_edge_start_index)
|
|
self.evaluation_mode = bool(evaluation_mode)
|
|
|
|
def calculate(
|
|
self,
|
|
*,
|
|
info: Dict,
|
|
current_edge_speeds: np.ndarray,
|
|
prev_edge_speeds: np.ndarray,
|
|
episode_index: int,
|
|
) -> float:
|
|
_ = current_edge_speeds, prev_edge_speeds, episode_index
|
|
|
|
mean_speed = max(float(info.get("mean_speed", 0.0)), 0.0)
|
|
num_vehicles = max(int(info.get("num_vehicles", 0)), 0)
|
|
r_efficiency = clip01(mean_speed / max(self.config.v_limit, 1e-6)) if num_vehicles > 0 else 0.0
|
|
|
|
stop_rate = clip01(float(info.get("stop_rate", 0.0)))
|
|
safety_penalty = clip01(self.config.safety_stop_weight * stop_rate)
|
|
r_safety = -safety_penalty
|
|
|
|
info["r_efficiency"] = float(r_efficiency)
|
|
info["r_safety"] = float(r_safety)
|
|
info["safety_penalty_norm"] = float(safety_penalty)
|
|
|
|
reward = (
|
|
self.config.efficiency_weight * r_efficiency
|
|
+ self.config.safety_weight * r_safety
|
|
)
|
|
return float(reward * self.config.reward_scale)
|