169 lines
6.8 KiB
Python
169 lines
6.8 KiB
Python
"""Minimal reward blueprint for the current freeway VSL study."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from typing import Iterable, Tuple
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class RewardTerm:
|
|
name: str
|
|
symbol: str
|
|
objective: str
|
|
rationale: str
|
|
formula_tex: str
|
|
required_signals: Tuple[str, ...]
|
|
|
|
def to_markdown(self) -> str:
|
|
return "\n".join(
|
|
[
|
|
f"### {self.name}",
|
|
f"- Symbol: `{self.symbol}`",
|
|
f"- Objective: {self.objective}",
|
|
f"- Rationale: {self.rationale}",
|
|
f"- Formula: `{self.formula_tex}`",
|
|
"- Required signals: " + ", ".join(f"`{value}`" for value in self.required_signals),
|
|
]
|
|
)
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class RewardBlueprint:
|
|
name: str
|
|
scenario_summary: str
|
|
primary_objective: str
|
|
design_principles: Tuple[str, ...]
|
|
terms: Tuple[RewardTerm, ...]
|
|
global_formula_tex: str
|
|
excluded_metrics: Tuple[str, ...] = ()
|
|
implementation_notes: Tuple[str, ...] = ()
|
|
|
|
def term_names(self) -> Tuple[str, ...]:
|
|
return tuple(term.name for term in self.terms)
|
|
|
|
def to_markdown(self) -> str:
|
|
lines = [
|
|
f"# {self.name}",
|
|
"",
|
|
"## Scenario",
|
|
self.scenario_summary,
|
|
"",
|
|
"## Primary Objective",
|
|
self.primary_objective,
|
|
"",
|
|
"## Global Formula",
|
|
f"`{self.global_formula_tex}`",
|
|
"",
|
|
"## Design Principles",
|
|
]
|
|
lines.extend(f"- {item}" for item in self.design_principles)
|
|
lines.append("")
|
|
lines.append("## Reward Terms")
|
|
for term in self.terms:
|
|
lines.append(term.to_markdown())
|
|
lines.append("")
|
|
if self.excluded_metrics:
|
|
lines.append("## Metrics To Avoid As Primary Reward Drivers")
|
|
lines.extend(f"- {item}" for item in self.excluded_metrics)
|
|
lines.append("")
|
|
if self.implementation_notes:
|
|
lines.append("## Implementation Notes")
|
|
lines.extend(f"- {item}" for item in self.implementation_notes)
|
|
return "\n".join(lines).rstrip() + "\n"
|
|
|
|
|
|
def build_tca_mappo_reward_blueprint() -> RewardBlueprint:
|
|
"""Build the current multiplicative-MAUT reward blueprint for corridor VSL."""
|
|
|
|
terms = (
|
|
RewardTerm(
|
|
name="efficiency",
|
|
symbol="R_efficiency",
|
|
objective="Map normalized corridor running efficiency into a bounded utility term.",
|
|
rationale=(
|
|
"The raw efficiency indicator should not enter the reward linearly. A monotone concave utility "
|
|
"function emphasizes recovery from low-efficiency states while avoiding over-rewarding already "
|
|
"high-speed regimes."
|
|
),
|
|
formula_tex=(
|
|
r"R_{\mathrm{efficiency}}(t)=1-\exp\!\left(-\alpha E(t)\right),\ "
|
|
r"E(t)=\begin{cases}\mathrm{clip}(\bar{v}(t)/v_{\max},0,1),&N(t)>0\\0,&N(t)=0\end{cases}"
|
|
),
|
|
required_signals=("controlled-corridor mean speed", "controlled-corridor active vehicle count", "speed limit"),
|
|
),
|
|
RewardTerm(
|
|
name="safety",
|
|
symbol="R_safety",
|
|
objective="Map TTC-based following risk into a bounded safety utility term.",
|
|
rationale=(
|
|
"For freeway VSL, rear-end conflict risk is better captured by time-to-collision than by "
|
|
"stop counting. An exponential safety utility makes the reward sensitive to the spread of "
|
|
"short-TTC following states while preserving boundedness and interpretability."
|
|
),
|
|
formula_tex=(
|
|
r"R_{\mathrm{safety}}(t)=\exp\!\left(-\beta S(t)\right),\ "
|
|
r"S(t)=\frac{1}{N(t)}\sum_{i=1}^{N(t)}\max\!\left(0,1-\frac{\mathrm{TTC}_i(t)}{\tau_{\mathrm{ttc}}}\right)"
|
|
),
|
|
required_signals=(
|
|
"controlled-corridor vehicle speeds",
|
|
"leader speeds",
|
|
"leader gaps",
|
|
"TTC threshold",
|
|
),
|
|
),
|
|
)
|
|
|
|
return RewardBlueprint(
|
|
name="Multiplicative MAUT Reward Blueprint For TCA-MAPPO",
|
|
scenario_summary=(
|
|
"The study controls a segmented freeway VSL corridor under fixed control intervals. "
|
|
"For architecture comparison, the reward should emphasize a small number of stable, "
|
|
"interpretable traffic goals while avoiding overly simplistic linear compensation between "
|
|
"efficiency and safety."
|
|
),
|
|
primary_objective=(
|
|
"Improve corridor running efficiency and traffic safety through a nonlinear utility-based reward."
|
|
),
|
|
design_principles=(
|
|
"Prefer stable per-step traffic signals over highly time-dependent outflow spikes.",
|
|
"Map raw traffic indicators into utility space before aggregation.",
|
|
"Avoid full linear substitutability between efficiency and safety.",
|
|
"Use a compact nonlinear aggregation that remains easy to interpret and implement.",
|
|
"Use a freeway-native rear-end risk surrogate instead of coarse stop counting.",
|
|
"Avoid auxiliary regularizers that change the objective across training stages.",
|
|
),
|
|
terms=terms,
|
|
global_formula_tex=(
|
|
r"R(t)=R_{\mathrm{efficiency}}(t)^{\lambda_{\mathrm{eff}}}"
|
|
r"R_{\mathrm{safety}}(t)^{\lambda_{\mathrm{safe}}}"
|
|
),
|
|
excluded_metrics=(
|
|
"Instantaneous outflow as the primary step reward because it is strongly modulated by simulation time.",
|
|
"Too many correlated penalties such as shockwave, occupancy, braking, and variance all entering together.",
|
|
"Purely linear reward aggregation when safety should not be fully compensated by efficiency gains.",
|
|
),
|
|
implementation_notes=(
|
|
"The efficiency utility is built from normalized controlled-corridor mean speed.",
|
|
"The safety utility is built from a TTC-thresholded average following-risk indicator.",
|
|
"The exponents are normalized internally before multiplicative aggregation.",
|
|
"Throughput and bottleneck occupancy can still be logged for diagnosis even if they are no longer part of the reward.",
|
|
"Training and evaluate now share the same fixed reward structure.",
|
|
),
|
|
)
|
|
|
|
|
|
def build_reward_blueprint_markdown() -> str:
|
|
return build_tca_mappo_reward_blueprint().to_markdown()
|
|
|
|
|
|
def iter_required_signals() -> Iterable[str]:
|
|
signals = set()
|
|
for term in build_tca_mappo_reward_blueprint().terms:
|
|
signals.update(term.required_signals)
|
|
return tuple(sorted(signals))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print(build_reward_blueprint_markdown())
|