1111 lines
45 KiB
Python
1111 lines
45 KiB
Python
"""Evaluate trained models and export structured CSV/plots."""
|
|
import argparse
|
|
import concurrent.futures
|
|
import copy
|
|
import glob
|
|
import inspect
|
|
import os
|
|
import sys
|
|
from typing import Dict, List, Optional, Tuple
|
|
|
|
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
if PROJECT_ROOT not in sys.path:
|
|
sys.path.insert(0, PROJECT_ROOT)
|
|
|
|
import matplotlib
|
|
import numpy as np
|
|
import pandas as pd
|
|
import yaml
|
|
|
|
matplotlib.use("Agg")
|
|
import matplotlib.pyplot as plt
|
|
|
|
from agents.appo_agent import APPOAgent
|
|
from agents.dcmappo_agent import DCMAPPOAgent
|
|
from agents.ddqn_agent import DDQNAgent
|
|
from agents.ddpg_agent import DDPGAgent
|
|
from agents.dqn_agent import DQNAgent
|
|
from agents.gpro_agent import GPROAgent
|
|
from agents.madqn_agent import MADQNAgent
|
|
from agents.mappo_agent import MAPPOAgent
|
|
from agents.ppo_agent import PPOAgent
|
|
from agents.qmix_agent import QMIXAgent
|
|
from agents.dcqmix_agent import DCQMIXAgent
|
|
from agents.sac_agent import SACAgent
|
|
from agents.sctd3_agent import SCTD3Agent
|
|
from agents.tcamappo_agent import TCAMAPPOAgent
|
|
from agents.td3_agent import TD3Agent
|
|
from envs.edge_vsl_env import SUMOEdgeVSLEnvironment
|
|
from envs.reward_system import REWARD_COMPONENT_COLUMNS, REWARD_COMPONENT_LABELS
|
|
from utils.config import get_agent_config
|
|
from utils.heatmap_plotting import (
|
|
build_action_panel,
|
|
build_occupancy_panel,
|
|
build_speed_panel,
|
|
save_heatmap_panels,
|
|
)
|
|
from utils.run_dirs import find_shared_config_path, resolve_checkpoint_root
|
|
|
|
|
|
MODEL_ORDER = ["ppo", "gpro", "appo", "mappo", "tcamappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "sac", "td3", "sctd3"]
|
|
BASELINE_NAME = "no_control"
|
|
EVAL_ORDER = [BASELINE_NAME] + MODEL_ORDER
|
|
MODEL_LABELS = {
|
|
BASELINE_NAME: "NO_CONTROL",
|
|
"ppo": "PPO",
|
|
"gpro": "GPRO-PPO",
|
|
"appo": "APPO",
|
|
"mappo": "MAPPO",
|
|
"tcamappo": "TCA-MAPPO",
|
|
"dcmappo": "DC-MAPPO",
|
|
"dqn": "DQN",
|
|
"madqn": "MA-DQN",
|
|
"ddqn": "DDQN",
|
|
"qmix": "QMIX",
|
|
"dcqmix": "DC-QMIX",
|
|
"ddpg": "DDPG",
|
|
"sac": "SAC",
|
|
"td3": "TD3",
|
|
"sctd3": "SC-TD3",
|
|
}
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser(description="Evaluate trained SUMO VSL models.")
|
|
parser.add_argument(
|
|
"--checkpoint-root",
|
|
type=str,
|
|
default=None,
|
|
help="Checkpoint root or run root. Default: latest under runs/<timestamp>.",
|
|
)
|
|
parser.add_argument(
|
|
"--output-dir",
|
|
type=str,
|
|
default=None,
|
|
help="Evaluation output directory. Default: results/evaluations/<run_name>.",
|
|
)
|
|
parser.add_argument(
|
|
"--config",
|
|
type=str,
|
|
default="config_sumo_vsl.yaml",
|
|
help="Fallback config path when the shared run config is unavailable.",
|
|
)
|
|
parser.add_argument(
|
|
"--models",
|
|
nargs="*",
|
|
default=None,
|
|
help="Subset of models to evaluate, e.g. --models ppo gpro tcamappo dcmappo dqn madqn ddqn qmix dcqmix sac td3 sctd3",
|
|
)
|
|
parser.add_argument("--seed", type=int, default=42, help="Evaluation seed.")
|
|
parser.add_argument(
|
|
"--workers",
|
|
type=int,
|
|
default=None,
|
|
help="Number of parallel evaluation workers. Default: min(model_count, max(1, cpu_count//2)).",
|
|
)
|
|
parser.add_argument(
|
|
"--with-gui",
|
|
action="store_true",
|
|
help="Run evaluation with SUMO GUI enabled.",
|
|
)
|
|
parser.add_argument(
|
|
"--begin-time",
|
|
type=int,
|
|
default=None,
|
|
help="Override SUMO evaluation begin time in seconds. Default: use training config.",
|
|
)
|
|
parser.add_argument(
|
|
"--end-time",
|
|
type=int,
|
|
default=None,
|
|
help="Override SUMO evaluation end time in seconds. Default: use training config.",
|
|
)
|
|
parser.add_argument(
|
|
"--step-length",
|
|
type=float,
|
|
default=None,
|
|
help="Override SUMO simulation step length for evaluation only. Default: use training config.",
|
|
)
|
|
parser.add_argument(
|
|
"--route-file",
|
|
type=str,
|
|
default=None,
|
|
help="Override SUMO route/flow file for evaluation only. Supports absolute paths or project-relative paths.",
|
|
)
|
|
return parser.parse_args()
|
|
|
|
|
|
def normalize_model_name(name: str) -> str:
|
|
lowered = name.strip().lower()
|
|
if lowered not in MODEL_ORDER:
|
|
raise ValueError(f"Unsupported model name: {name}")
|
|
return lowered
|
|
|
|
|
|
def discover_model_dirs(checkpoint_root: str, requested_models: List[str] = None) -> Dict[str, str]:
|
|
checkpoint_root = os.path.abspath(checkpoint_root)
|
|
requested = [normalize_model_name(m) for m in requested_models] if requested_models else None
|
|
|
|
discovered = {}
|
|
for model_name in MODEL_ORDER:
|
|
model_dir = os.path.join(checkpoint_root, model_name)
|
|
if os.path.isdir(model_dir):
|
|
discovered[model_name] = model_dir
|
|
|
|
if discovered:
|
|
if requested is None:
|
|
return discovered
|
|
return {k: v for k, v in discovered.items() if k in requested}
|
|
|
|
base_name = os.path.basename(checkpoint_root).lower()
|
|
parent_name = os.path.basename(os.path.dirname(checkpoint_root)).lower()
|
|
grandparent_name = os.path.basename(os.path.dirname(os.path.dirname(checkpoint_root))).lower()
|
|
if base_name in MODEL_LABELS and (
|
|
parent_name == "checkpoints" or grandparent_name in {"checkpoints", "multi-model"}
|
|
):
|
|
model_name = base_name
|
|
if requested is not None and model_name not in requested:
|
|
return {}
|
|
return {model_name: checkpoint_root}
|
|
|
|
if os.path.isfile(os.path.join(checkpoint_root, "config.yaml")):
|
|
model_name = None
|
|
if parent_name in MODEL_LABELS:
|
|
model_name = parent_name
|
|
elif base_name in MODEL_LABELS:
|
|
model_name = base_name
|
|
elif requested and len(requested) == 1:
|
|
model_name = requested[0]
|
|
if model_name is None:
|
|
raise ValueError(
|
|
"Single checkpoint dir detected, but model type cannot be inferred. "
|
|
"Please pass --models <model_name>."
|
|
)
|
|
return {model_name: checkpoint_root}
|
|
|
|
raise FileNotFoundError(f"No model checkpoint directories found in: {checkpoint_root}")
|
|
|
|
|
|
def infer_eval_run_name(checkpoint_root: str) -> str:
|
|
normalized_root = os.path.abspath(checkpoint_root)
|
|
base_name = os.path.basename(normalized_root)
|
|
parent_dir = os.path.dirname(normalized_root)
|
|
parent_name = os.path.basename(parent_dir)
|
|
grandparent_dir = os.path.dirname(parent_dir)
|
|
grandparent_name = os.path.basename(grandparent_dir)
|
|
|
|
if base_name == "checkpoints":
|
|
return parent_name
|
|
|
|
if parent_name == "checkpoints":
|
|
return f"{base_name}_{grandparent_name}"
|
|
|
|
if grandparent_name == "multi-model":
|
|
return f"{base_name}_{parent_name}"
|
|
|
|
if parent_name == "multi-model":
|
|
return base_name
|
|
|
|
if parent_name in MODEL_ORDER:
|
|
return f"{parent_name}_{base_name}"
|
|
|
|
return base_name
|
|
|
|
|
|
def resolve_eval_output_dir(output_dir: str, checkpoint_root: str) -> str:
|
|
if output_dir:
|
|
return output_dir
|
|
|
|
run_name = infer_eval_run_name(checkpoint_root)
|
|
return os.path.join("results", "evaluations", run_name)
|
|
|
|
|
|
def load_config_for_checkpoint(checkpoint_dir: Optional[str], fallback_config_path: str) -> dict:
|
|
with open(fallback_config_path, "r", encoding="utf-8") as f:
|
|
base_config = yaml.safe_load(f)
|
|
|
|
checkpoint_config = find_shared_config_path(checkpoint_dir, fallback_config_path)
|
|
if checkpoint_config and os.path.isfile(checkpoint_config):
|
|
with open(checkpoint_config, "r", encoding="utf-8") as f:
|
|
checkpoint_loaded = yaml.safe_load(f)
|
|
return deep_merge_dicts(base_config, checkpoint_loaded)
|
|
|
|
return base_config
|
|
|
|
|
|
def deep_merge_dicts(base: dict, override: dict) -> dict:
|
|
merged = copy.deepcopy(base)
|
|
for key, value in override.items():
|
|
if isinstance(value, dict) and isinstance(merged.get(key), dict):
|
|
merged[key] = deep_merge_dicts(merged[key], value)
|
|
else:
|
|
merged[key] = copy.deepcopy(value)
|
|
return merged
|
|
|
|
|
|
def resolve_project_path(path_str: Optional[str]) -> Optional[str]:
|
|
if not path_str:
|
|
return None
|
|
if os.path.isabs(path_str):
|
|
return path_str
|
|
return os.path.abspath(os.path.join(PROJECT_ROOT, path_str))
|
|
|
|
|
|
def resolve_model_load_path(model_name: str, checkpoint_dir: str) -> str:
|
|
if model_name in {"ppo", "gpro", "appo", "mappo", "tcamappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix"}:
|
|
best_path = os.path.join(checkpoint_dir, "model_best.pt")
|
|
if os.path.isfile(best_path):
|
|
return best_path
|
|
episode_paths = sorted(glob.glob(os.path.join(checkpoint_dir, "model_ep*.pt")))
|
|
if episode_paths:
|
|
return episode_paths[-1]
|
|
else:
|
|
best_zip = os.path.join(checkpoint_dir, "model_best.zip")
|
|
best_base = os.path.join(checkpoint_dir, "model_best")
|
|
if os.path.isfile(best_zip):
|
|
return best_base
|
|
episode_paths = sorted(glob.glob(os.path.join(checkpoint_dir, "model_ep*.zip")))
|
|
if episode_paths:
|
|
return episode_paths[-1][:-4]
|
|
|
|
raise FileNotFoundError(f"No checkpoint file found for {model_name} under: {checkpoint_dir}")
|
|
|
|
|
|
def build_value_based_agent(agent_cls, agent_cfg: dict, env: SUMOEdgeVSLEnvironment):
|
|
candidate_kwargs = {
|
|
"state_dim": env.state_dim,
|
|
"num_edges": env.num_controlled_edges,
|
|
"num_actions_per_edge": env.action_dim,
|
|
"hidden_dim": agent_cfg.get("hidden_dim", 256),
|
|
"mixing_hidden_dim": agent_cfg.get(
|
|
"mixing_hidden_dim",
|
|
agent_cfg.get("hidden_dim", 256),
|
|
),
|
|
"learning_rate": agent_cfg.get("learning_rate", 3e-4),
|
|
"gamma": agent_cfg.get("gamma", 0.99),
|
|
"epsilon_start": agent_cfg.get("epsilon_start", 1.0),
|
|
"epsilon_end": agent_cfg.get("epsilon_end", 0.01),
|
|
"epsilon_decay": agent_cfg.get("epsilon_decay", 600),
|
|
"buffer_size": agent_cfg.get("buffer_size", 20000),
|
|
"batch_size": agent_cfg.get("batch_size", 128),
|
|
"target_update": agent_cfg.get("target_update", 5),
|
|
"device": agent_cfg.get("device", "cuda"),
|
|
"edge_feature_dim": env.features_per_edge,
|
|
"time_feature_dim": 3,
|
|
"total_edge_count": env.num_edges,
|
|
"controlled_start_index": env.controlled_edge_start_index,
|
|
"num_corridor_blocks": agent_cfg.get("num_corridor_blocks", 2),
|
|
"corridor_kernel_size": agent_cfg.get("corridor_kernel_size", 5),
|
|
"corridor_dropout": agent_cfg.get("corridor_dropout", 0.05),
|
|
}
|
|
accepted = inspect.signature(agent_cls).parameters
|
|
filtered_kwargs = {
|
|
key: value
|
|
for key, value in candidate_kwargs.items()
|
|
if key in accepted
|
|
}
|
|
return agent_cls(**filtered_kwargs)
|
|
|
|
|
|
def build_agent(model_name: str, config: dict, env: SUMOEdgeVSLEnvironment):
|
|
agent_cfg = get_agent_config(config, model_name)
|
|
if model_name == "ppo":
|
|
return PPOAgent(
|
|
state_dim=env.state_dim,
|
|
action_dims=[env.action_dim] * env.num_controlled_edges,
|
|
hidden_layers=agent_cfg.get("hidden_layers", [256, 256]),
|
|
learning_rate=agent_cfg.get("learning_rate", 3e-4),
|
|
gamma=agent_cfg.get("gamma", 0.99),
|
|
gae_lambda=agent_cfg.get("gae_lambda", 0.95),
|
|
clip_epsilon=agent_cfg.get("clip_epsilon", 0.2),
|
|
value_coef=agent_cfg.get("value_coef", 0.5),
|
|
entropy_coef=agent_cfg.get("entropy_coef", 0.01),
|
|
max_grad_norm=agent_cfg.get("max_grad_norm", 0.5),
|
|
ppo_epochs=agent_cfg.get("ppo_epochs", 4),
|
|
minibatch_size=agent_cfg.get("batch_size", 15),
|
|
device=agent_cfg.get("device", "cuda"),
|
|
lr_schedule=agent_cfg.get("lr_schedule", "cosine"),
|
|
total_episodes=config.get("training", {}).get("num_episodes", 4000),
|
|
)
|
|
if model_name == "gpro":
|
|
return GPROAgent(
|
|
state_dim=env.state_dim,
|
|
action_dims=[env.action_dim] * env.num_controlled_edges,
|
|
hidden_layers=agent_cfg.get("hidden_layers", [256, 256]),
|
|
learning_rate=agent_cfg.get("learning_rate", 3e-4),
|
|
clip_epsilon=agent_cfg.get("clip_epsilon", 0.2),
|
|
entropy_coef=agent_cfg.get("entropy_coef", 0.01),
|
|
max_grad_norm=agent_cfg.get("max_grad_norm", 0.5),
|
|
ppo_epochs=agent_cfg.get("ppo_epochs", 4),
|
|
minibatch_size=agent_cfg.get("batch_size", 15),
|
|
group_size=agent_cfg.get("group_size", 4),
|
|
advantage_epsilon=agent_cfg.get("advantage_epsilon", 1e-8),
|
|
device=agent_cfg.get("device", "cuda"),
|
|
lr_schedule=agent_cfg.get("lr_schedule", "cosine"),
|
|
total_episodes=config.get("training", {}).get("num_episodes", 4000),
|
|
)
|
|
if model_name == "appo":
|
|
return APPOAgent(
|
|
state_dim=env.state_dim,
|
|
action_dims=[env.action_dim] * env.num_controlled_edges,
|
|
edge_feature_dim=env.features_per_edge,
|
|
total_edge_count=env.num_edges,
|
|
controlled_start_index=env.controlled_edge_start_index,
|
|
hidden_dim=agent_cfg.get("hidden_dim", 128),
|
|
num_heads=agent_cfg.get("num_heads", 4),
|
|
num_layers=agent_cfg.get("num_layers", 2),
|
|
learning_rate=agent_cfg.get("learning_rate", 3e-4),
|
|
gamma=agent_cfg.get("gamma", 0.99),
|
|
gae_lambda=agent_cfg.get("gae_lambda", 0.95),
|
|
clip_epsilon=agent_cfg.get("clip_epsilon", 0.2),
|
|
value_coef=agent_cfg.get("value_coef", 0.5),
|
|
entropy_coef=agent_cfg.get("entropy_coef", 0.01),
|
|
max_grad_norm=agent_cfg.get("max_grad_norm", 0.5),
|
|
ppo_epochs=agent_cfg.get("ppo_epochs", 4),
|
|
minibatch_size=agent_cfg.get("batch_size", 15),
|
|
device=agent_cfg.get("device", "cuda"),
|
|
lr_schedule=agent_cfg.get("lr_schedule", "cosine"),
|
|
total_episodes=config.get("training", {}).get("num_episodes", 4000),
|
|
)
|
|
if model_name == "mappo":
|
|
return MAPPOAgent(
|
|
state_dim=env.state_dim,
|
|
num_agents=env.num_controlled_edges,
|
|
num_actions=env.action_dim,
|
|
edge_feature_dim=env.features_per_edge,
|
|
total_edge_count=env.num_edges,
|
|
controlled_start_index=env.controlled_edge_start_index,
|
|
hidden_dim=agent_cfg.get("hidden_dim", 256),
|
|
critic_hidden_dim=agent_cfg.get("critic_hidden_dim", 256),
|
|
learning_rate=agent_cfg.get("learning_rate", 3e-4),
|
|
gamma=agent_cfg.get("gamma", 0.99),
|
|
gae_lambda=agent_cfg.get("gae_lambda", 0.95),
|
|
clip_epsilon=agent_cfg.get("clip_epsilon", 0.2),
|
|
value_coef=agent_cfg.get("value_coef", 0.5),
|
|
entropy_coef=agent_cfg.get("entropy_coef", 0.01),
|
|
max_grad_norm=agent_cfg.get("max_grad_norm", 0.5),
|
|
ppo_epochs=agent_cfg.get("ppo_epochs", 4),
|
|
minibatch_size=agent_cfg.get("batch_size", 15),
|
|
device=agent_cfg.get("device", "cuda"),
|
|
lr_schedule=agent_cfg.get("lr_schedule", "cosine"),
|
|
total_episodes=config.get("training", {}).get("num_episodes", 4000),
|
|
)
|
|
if model_name == "dcmappo":
|
|
return DCMAPPOAgent(
|
|
state_dim=env.state_dim,
|
|
num_agents=env.num_controlled_edges,
|
|
num_actions=env.action_dim,
|
|
edge_feature_dim=env.features_per_edge,
|
|
total_edge_count=env.num_edges,
|
|
controlled_start_index=env.controlled_edge_start_index,
|
|
hidden_dim=agent_cfg.get("hidden_dim", 256),
|
|
critic_hidden_dim=agent_cfg.get("critic_hidden_dim", 256),
|
|
num_corridor_blocks=agent_cfg.get("num_corridor_blocks", 2),
|
|
corridor_kernel_size=agent_cfg.get("corridor_kernel_size", 3),
|
|
corridor_dropout=agent_cfg.get("corridor_dropout", 0.05),
|
|
learning_rate=agent_cfg.get("learning_rate", 3e-4),
|
|
gamma=agent_cfg.get("gamma", 0.99),
|
|
gae_lambda=agent_cfg.get("gae_lambda", 0.95),
|
|
clip_epsilon=agent_cfg.get("clip_epsilon", 0.2),
|
|
value_coef=agent_cfg.get("value_coef", 0.5),
|
|
entropy_coef=agent_cfg.get("entropy_coef", 0.01),
|
|
max_grad_norm=agent_cfg.get("max_grad_norm", 0.5),
|
|
ppo_epochs=agent_cfg.get("ppo_epochs", 4),
|
|
minibatch_size=agent_cfg.get("batch_size", 15),
|
|
device=agent_cfg.get("device", "cuda"),
|
|
lr_schedule=agent_cfg.get("lr_schedule", "cosine"),
|
|
total_episodes=config.get("training", {}).get("num_episodes", 4000),
|
|
)
|
|
if model_name == "tcamappo":
|
|
return TCAMAPPOAgent(
|
|
state_dim=env.state_dim,
|
|
num_agents=env.num_controlled_edges,
|
|
num_actions=env.action_dim,
|
|
edge_feature_dim=env.features_per_edge,
|
|
total_edge_count=env.num_edges,
|
|
controlled_start_index=env.controlled_edge_start_index,
|
|
hidden_dim=agent_cfg.get("hidden_dim", 256),
|
|
critic_hidden_dim=agent_cfg.get("critic_hidden_dim", 256),
|
|
history_window=agent_cfg.get("history_window", 6),
|
|
critic_num_heads=agent_cfg.get("critic_num_heads", 4),
|
|
critic_num_layers=agent_cfg.get("critic_num_layers", 2),
|
|
critic_dropout=agent_cfg.get("critic_dropout", 0.05),
|
|
learning_rate=agent_cfg.get("learning_rate", 3e-4),
|
|
gamma=agent_cfg.get("gamma", 0.99),
|
|
gae_lambda=agent_cfg.get("gae_lambda", 0.95),
|
|
clip_epsilon=agent_cfg.get("clip_epsilon", 0.2),
|
|
value_coef=agent_cfg.get("value_coef", 0.5),
|
|
entropy_coef=agent_cfg.get("entropy_coef", 0.01),
|
|
max_grad_norm=agent_cfg.get("max_grad_norm", 0.5),
|
|
ppo_epochs=agent_cfg.get("ppo_epochs", 4),
|
|
minibatch_size=agent_cfg.get("batch_size", 15),
|
|
device=agent_cfg.get("device", "cuda"),
|
|
lr_schedule=agent_cfg.get("lr_schedule", "cosine"),
|
|
total_episodes=config.get("training", {}).get("num_episodes", 4000),
|
|
)
|
|
if model_name == "dqn":
|
|
return build_value_based_agent(DQNAgent, agent_cfg, env)
|
|
if model_name == "madqn":
|
|
return build_value_based_agent(MADQNAgent, agent_cfg, env)
|
|
if model_name == "ddqn":
|
|
return build_value_based_agent(DDQNAgent, agent_cfg, env)
|
|
if model_name == "qmix":
|
|
return build_value_based_agent(QMIXAgent, agent_cfg, env)
|
|
if model_name == "dcqmix":
|
|
return build_value_based_agent(DCQMIXAgent, agent_cfg, env)
|
|
if model_name == "ddpg":
|
|
return DDPGAgent(
|
|
state_dim=env.state_dim,
|
|
action_dims=[env.action_dim] * env.num_controlled_edges,
|
|
learning_rate=agent_cfg.get("learning_rate", 3e-4),
|
|
buffer_size=agent_cfg.get("buffer_size", 20000),
|
|
learning_starts=agent_cfg.get("learning_starts", 200),
|
|
batch_size=agent_cfg.get("batch_size", 128),
|
|
tau=agent_cfg.get("tau", 0.005),
|
|
gamma=agent_cfg.get("gamma", 0.99),
|
|
exploration_sigma=agent_cfg.get("exploration_sigma", 0.15),
|
|
device=agent_cfg.get("device", "cuda"),
|
|
actor_hidden_dims=agent_cfg.get("actor_hidden_dims"),
|
|
critic_hidden_dims=agent_cfg.get("critic_hidden_dims"),
|
|
activation_fn=agent_cfg.get("activation_fn", "relu"),
|
|
)
|
|
if model_name == "sac":
|
|
return SACAgent(
|
|
state_dim=env.state_dim,
|
|
action_dims=[env.action_dim] * env.num_controlled_edges,
|
|
learning_rate=agent_cfg.get("learning_rate", 3e-4),
|
|
buffer_size=agent_cfg.get("buffer_size", 20000),
|
|
learning_starts=agent_cfg.get("learning_starts", 200),
|
|
batch_size=agent_cfg.get("batch_size", 128),
|
|
tau=agent_cfg.get("tau", 0.005),
|
|
gamma=agent_cfg.get("gamma", 0.99),
|
|
ent_coef=agent_cfg.get("ent_coef", "auto"),
|
|
target_entropy=agent_cfg.get("target_entropy", "auto"),
|
|
target_update_interval=agent_cfg.get("target_update_interval", 1),
|
|
log_std_init=agent_cfg.get("log_std_init", -3.0),
|
|
device=agent_cfg.get("device", "cuda"),
|
|
actor_hidden_dims=agent_cfg.get("actor_hidden_dims"),
|
|
critic_hidden_dims=agent_cfg.get("critic_hidden_dims"),
|
|
activation_fn=agent_cfg.get("activation_fn", "relu"),
|
|
)
|
|
if model_name in {"td3", "sctd3"}:
|
|
agent_cls = SCTD3Agent if model_name == "sctd3" else TD3Agent
|
|
common_kwargs = dict(
|
|
state_dim=env.state_dim,
|
|
action_dims=[env.action_dim] * env.num_controlled_edges,
|
|
learning_rate=agent_cfg.get("learning_rate", 3e-4),
|
|
buffer_size=agent_cfg.get("buffer_size", 20000),
|
|
learning_starts=agent_cfg.get("learning_starts", 200),
|
|
batch_size=agent_cfg.get("batch_size", 128),
|
|
tau=agent_cfg.get("tau", 0.005),
|
|
gamma=agent_cfg.get("gamma", 0.99),
|
|
policy_delay=agent_cfg.get("policy_delay", 2),
|
|
exploration_sigma=agent_cfg.get("exploration_sigma", 0.15),
|
|
device=agent_cfg.get("device", "cuda"),
|
|
actor_hidden_dims=agent_cfg.get("actor_hidden_dims"),
|
|
critic_hidden_dims=agent_cfg.get("critic_hidden_dims"),
|
|
)
|
|
if model_name == "sctd3":
|
|
common_kwargs.update(
|
|
edge_feature_dim=env.features_per_edge,
|
|
total_edge_count=env.num_edges,
|
|
controlled_start_index=env.controlled_edge_start_index,
|
|
extractor_feature_dim=agent_cfg.get("extractor_feature_dim", 128),
|
|
extractor_edge_hidden_dim=agent_cfg.get("extractor_edge_hidden_dim", 16),
|
|
extractor_global_hidden_dim=agent_cfg.get("extractor_global_hidden_dim", 32),
|
|
extractor_spatial_blocks=agent_cfg.get("extractor_spatial_blocks", 1),
|
|
extractor_kernel_size=agent_cfg.get("extractor_kernel_size", 3),
|
|
activation_fn=agent_cfg.get("activation_fn", "relu"),
|
|
)
|
|
else:
|
|
common_kwargs.update(
|
|
activation_fn=agent_cfg.get("activation_fn", "relu"),
|
|
)
|
|
return agent_cls(**common_kwargs)
|
|
raise ValueError(f"Unsupported model name: {model_name}")
|
|
|
|
|
|
def select_deterministic_action(agent, state: np.ndarray) -> np.ndarray:
|
|
action, _, _ = agent.select_action(state, deterministic=True)
|
|
return action
|
|
|
|
|
|
def select_no_control_action(env: SUMOEdgeVSLEnvironment) -> np.ndarray:
|
|
if env.num_controlled_edges <= 0:
|
|
return np.zeros(0, dtype=np.int64)
|
|
return np.full(env.num_controlled_edges, env.action_dim - 1, dtype=np.int64)
|
|
|
|
|
|
def resolve_logged_action_info(
|
|
model_name: str,
|
|
env: SUMOEdgeVSLEnvironment,
|
|
action: Optional[np.ndarray],
|
|
action_applied_mask: List[bool],
|
|
edge_idx: int,
|
|
action_speed_kmh: float,
|
|
) -> Tuple[int, float]:
|
|
if not action_applied_mask[edge_idx]:
|
|
return -1, action_speed_kmh
|
|
controlled_idx = edge_idx - env.controlled_edge_start_index
|
|
if action is None or controlled_idx < 0 or controlled_idx >= len(action):
|
|
return -1, action_speed_kmh
|
|
return int(action[controlled_idx]), action_speed_kmh
|
|
|
|
|
|
def update_mainline_travel_time_tracking(
|
|
info: dict,
|
|
mainline_depart_times: Dict[str, float],
|
|
active_mainline_vehicle_ids: set,
|
|
completed_mainline_travel_times: List[float],
|
|
) -> Tuple[int, float, float]:
|
|
for event in info.get("departed_vehicle_events", []):
|
|
if not event.get("is_mainline", False):
|
|
continue
|
|
veh_id = str(event["vehicle_id"])
|
|
mainline_depart_times[veh_id] = float(event["sim_time"])
|
|
active_mainline_vehicle_ids.add(veh_id)
|
|
|
|
interval_travel_times = []
|
|
for event in info.get("arrived_vehicle_events", []):
|
|
veh_id = str(event["vehicle_id"])
|
|
if veh_id not in active_mainline_vehicle_ids:
|
|
continue
|
|
depart_time = mainline_depart_times.pop(veh_id, None)
|
|
active_mainline_vehicle_ids.discard(veh_id)
|
|
if depart_time is None:
|
|
continue
|
|
travel_time = float(event["sim_time"]) - depart_time
|
|
if travel_time < 0:
|
|
continue
|
|
interval_travel_times.append(travel_time)
|
|
completed_mainline_travel_times.append(travel_time)
|
|
|
|
interval_mean = float(np.mean(interval_travel_times)) if interval_travel_times else np.nan
|
|
cumulative_mean = (
|
|
float(np.mean(completed_mainline_travel_times))
|
|
if completed_mainline_travel_times
|
|
else np.nan
|
|
)
|
|
return len(interval_travel_times), interval_mean, cumulative_mean
|
|
|
|
|
|
def _extract_reward_components(info: dict) -> Dict[str, float]:
|
|
return {column: info.get(column, np.nan) for column in REWARD_COMPONENT_COLUMNS}
|
|
|
|
|
|
def evaluate_single_model(
|
|
model_name: str,
|
|
checkpoint_dir: Optional[str],
|
|
fallback_config_path: str,
|
|
output_dir: str,
|
|
seed: int,
|
|
begin_time: int,
|
|
end_time: int,
|
|
with_gui: bool,
|
|
step_length: Optional[float],
|
|
route_file: Optional[str],
|
|
) -> Tuple[pd.DataFrame, pd.DataFrame, dict]:
|
|
config = load_config_for_checkpoint(checkpoint_dir, fallback_config_path) if checkpoint_dir else load_config_for_checkpoint("", fallback_config_path)
|
|
runtime_config = copy.deepcopy(config)
|
|
runtime_config.setdefault("sumo", {})
|
|
effective_begin_time = runtime_config["sumo"].get("begin_time")
|
|
effective_end_time = runtime_config["sumo"].get("end_time")
|
|
if begin_time is not None:
|
|
runtime_config["sumo"]["begin_time"] = begin_time
|
|
effective_begin_time = begin_time
|
|
if end_time is not None:
|
|
runtime_config["sumo"]["end_time"] = end_time
|
|
effective_end_time = end_time
|
|
runtime_config["sumo"]["gui"] = with_gui
|
|
if step_length is not None:
|
|
runtime_config["sumo"]["step_length"] = step_length
|
|
if route_file is not None:
|
|
runtime_config["sumo"]["route_file"] = route_file
|
|
runtime_config.setdefault("runtime", {})["output_dir"] = os.path.join(output_dir, model_name)
|
|
runtime_config["runtime"]["metrics_subdir"] = "eval_sumo_metrics"
|
|
runtime_config["runtime"]["collect_detector_cells"] = True
|
|
runtime_config["runtime"]["use_vehicle_subscriptions"] = True
|
|
runtime_config["runtime"]["collect_trip_events"] = True
|
|
runtime_config["runtime"]["evaluation_mode"] = True
|
|
|
|
env = SUMOEdgeVSLEnvironment(runtime_config)
|
|
agent = None
|
|
load_path = None
|
|
if model_name != BASELINE_NAME:
|
|
agent = build_agent(model_name, runtime_config, env)
|
|
load_path = resolve_model_load_path(model_name, checkpoint_dir)
|
|
agent.load(load_path)
|
|
if hasattr(agent, "reset_episode"):
|
|
agent.reset_episode()
|
|
|
|
state = env.reset(seed=seed)
|
|
step_rows = []
|
|
edge_rows = []
|
|
detector_rows = []
|
|
step_idx = 0
|
|
mainline_depart_times: Dict[str, float] = {}
|
|
active_mainline_vehicle_ids = set()
|
|
completed_mainline_travel_times: List[float] = []
|
|
|
|
while True:
|
|
if model_name == BASELINE_NAME:
|
|
action = select_no_control_action(env)
|
|
next_state, reward, done, info = env.step(action, apply_control=True)
|
|
else:
|
|
action = select_deterministic_action(agent, state)
|
|
next_state, reward, done, info = env.step(action, apply_control=True)
|
|
if model_name != BASELINE_NAME and hasattr(agent, "update_temporal_context"):
|
|
agent.update_temporal_context(state, action, reward, info)
|
|
step_idx += 1
|
|
(
|
|
mainline_completed_count,
|
|
mainline_interval_travel_time_mean_s,
|
|
mainline_travel_time_cumulative_mean_s,
|
|
) = update_mainline_travel_time_tracking(
|
|
info,
|
|
mainline_depart_times,
|
|
active_mainline_vehicle_ids,
|
|
completed_mainline_travel_times,
|
|
)
|
|
|
|
step_row = {
|
|
"model": model_name,
|
|
"model_label": MODEL_LABELS[model_name],
|
|
"step": step_idx,
|
|
"sim_time": info.get("sim_time", np.nan),
|
|
"reward": reward,
|
|
"throughput": info.get("throughput", np.nan),
|
|
"arrived_count": info.get("arrived_count", np.nan),
|
|
"departed_count": info.get("departed_count", np.nan),
|
|
"mean_speed_kmh": info.get("mean_speed_kmh", np.nan),
|
|
"speed_variance_norm": info.get("speed_variance_norm", np.nan),
|
|
"mean_occupancy": info.get("mean_occupancy", np.nan),
|
|
"density": info.get("density", np.nan),
|
|
"num_vehicles": info.get("num_vehicles", np.nan),
|
|
"num_stops": info.get("num_stops", np.nan),
|
|
"mainline_completed_count": mainline_completed_count,
|
|
"mainline_interval_travel_time_mean_s": mainline_interval_travel_time_mean_s,
|
|
"mainline_travel_time_cumulative_mean_s": mainline_travel_time_cumulative_mean_s,
|
|
}
|
|
step_row.update(_extract_reward_components(info))
|
|
step_rows.append(step_row)
|
|
|
|
measured_speeds_ms = info.get("edge_speeds_ms", [])
|
|
occupancies = info.get("edge_occupancies", [])
|
|
action_speeds_kmh = info.get("edge_speeds_kmh", [])
|
|
action_applied_mask = info.get("action_applied_mask", [True] * env.num_edges)
|
|
for edge_idx, edge_id in enumerate(env.control_edges):
|
|
action_index, logged_speed_kmh = resolve_logged_action_info(
|
|
model_name=model_name,
|
|
env=env,
|
|
action=action,
|
|
action_applied_mask=action_applied_mask,
|
|
edge_idx=edge_idx,
|
|
action_speed_kmh=float(action_speeds_kmh[edge_idx]),
|
|
)
|
|
edge_rows.append(
|
|
{
|
|
"model": model_name,
|
|
"model_label": MODEL_LABELS[model_name],
|
|
"step": step_idx,
|
|
"edge_index": edge_idx,
|
|
"edge_id": edge_id,
|
|
"action_index": action_index,
|
|
"action_speed_kmh": logged_speed_kmh,
|
|
"action_applied": bool(action_applied_mask[edge_idx]),
|
|
"measured_speed_kmh": float(measured_speeds_ms[edge_idx] * 3.6),
|
|
"occupancy": float(occupancies[edge_idx]),
|
|
}
|
|
)
|
|
|
|
for detector_cell in info.get("detector_cells", []):
|
|
edge_idx = detector_cell["edge_index"]
|
|
action_index, logged_speed_kmh = resolve_logged_action_info(
|
|
model_name=model_name,
|
|
env=env,
|
|
action=action,
|
|
action_applied_mask=action_applied_mask,
|
|
edge_idx=edge_idx,
|
|
action_speed_kmh=float(action_speeds_kmh[edge_idx]),
|
|
)
|
|
detector_rows.append(
|
|
{
|
|
"model": model_name,
|
|
"model_label": MODEL_LABELS[model_name],
|
|
"step": step_idx,
|
|
"edge_index": edge_idx,
|
|
"edge_id": detector_cell["edge_id"],
|
|
"pos_index": detector_cell["pos_index"],
|
|
"position_m": detector_cell["position_m"],
|
|
"distance_m": detector_cell["distance_m"],
|
|
"cell_id": f"{detector_cell['edge_id']}@{detector_cell['pos_index']}",
|
|
"action_index": action_index,
|
|
"action_speed_kmh": logged_speed_kmh,
|
|
"action_applied": bool(action_applied_mask[edge_idx]),
|
|
"measured_speed_kmh": float(detector_cell["speed_ms"] * 3.6),
|
|
"occupancy": float(detector_cell["occupancy"]),
|
|
"vehicle_count": int(detector_cell["vehicle_count"]),
|
|
}
|
|
)
|
|
|
|
state = next_state
|
|
if done:
|
|
break
|
|
|
|
env.close()
|
|
step_df = pd.DataFrame(step_rows)
|
|
edge_df = pd.DataFrame(edge_rows)
|
|
detector_df = pd.DataFrame(detector_rows)
|
|
meta = {
|
|
"model": model_name,
|
|
"checkpoint_dir": os.path.abspath(checkpoint_dir) if checkpoint_dir else "",
|
|
"checkpoint_path": (
|
|
os.path.abspath(load_path if load_path.endswith(".pt") else f"{load_path}.zip")
|
|
if load_path else ""
|
|
),
|
|
"num_steps": len(step_df),
|
|
"num_edges": env.num_edges,
|
|
"begin_time": effective_begin_time,
|
|
"end_time": effective_end_time,
|
|
"with_gui": with_gui,
|
|
"step_length": runtime_config["sumo"].get("step_length"),
|
|
"route_file": runtime_config["sumo"].get("route_file", ""),
|
|
"mainline_completed_total": len(completed_mainline_travel_times),
|
|
"mainline_travel_time_mean_s": (
|
|
float(np.mean(completed_mainline_travel_times))
|
|
if completed_mainline_travel_times
|
|
else np.nan
|
|
),
|
|
}
|
|
return step_df, edge_df, detector_df, meta
|
|
|
|
|
|
def evaluate_worker(task: Tuple[str, Optional[str], str, str, int, Optional[int], Optional[int], bool, Optional[float], Optional[str]]):
|
|
return evaluate_single_model(*task)
|
|
|
|
|
|
def build_summary(step_df: pd.DataFrame) -> pd.DataFrame:
|
|
grouped = step_df.groupby(["model", "model_label"], sort=False)
|
|
aggregations = dict(
|
|
steps=("step", "count"),
|
|
reward_sum=("reward", "sum"),
|
|
reward_mean=("reward", "mean"),
|
|
throughput_mean=("throughput", "mean"),
|
|
throughput_std=("throughput", "std"),
|
|
mean_speed_kmh_mean=("mean_speed_kmh", "mean"),
|
|
mean_speed_kmh_std=("mean_speed_kmh", "std"),
|
|
speed_variance_norm_mean=("speed_variance_norm", "mean"),
|
|
density_mean=("density", "mean"),
|
|
stops_total=("num_stops", "sum"),
|
|
stops_mean=("num_stops", "mean"),
|
|
mainline_completed_total=("mainline_completed_count", "sum"),
|
|
mainline_travel_time_mean_s=("mainline_travel_time_cumulative_mean_s", "last"),
|
|
)
|
|
for column in REWARD_COMPONENT_COLUMNS:
|
|
aggregations[f"{column}_mean"] = (column, "mean")
|
|
summary_df = grouped.agg(**aggregations).reset_index()
|
|
|
|
summary_df["throughput_std"] = summary_df["throughput_std"].fillna(0.0)
|
|
summary_df["mean_speed_kmh_std"] = summary_df["mean_speed_kmh_std"].fillna(0.0)
|
|
return summary_df.sort_values("model", key=lambda s: s.map({m: i for i, m in enumerate(EVAL_ORDER)}))
|
|
|
|
|
|
def save_csv_outputs(
|
|
step_df: pd.DataFrame,
|
|
edge_df: pd.DataFrame,
|
|
detector_df: pd.DataFrame,
|
|
summary_df: pd.DataFrame,
|
|
meta_rows: List[dict],
|
|
output_dir: str,
|
|
):
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
step_df.to_csv(os.path.join(output_dir, "step_metrics.csv"), index=False)
|
|
edge_df.to_csv(os.path.join(output_dir, "edge_metrics.csv"), index=False)
|
|
detector_df.to_csv(os.path.join(output_dir, "detector_metrics.csv"), index=False)
|
|
summary_df.to_csv(os.path.join(output_dir, "summary.csv"), index=False)
|
|
pd.DataFrame(meta_rows).to_csv(os.path.join(output_dir, "evaluation_manifest.csv"), index=False)
|
|
|
|
per_model_dir = os.path.join(output_dir, "per_model")
|
|
os.makedirs(per_model_dir, exist_ok=True)
|
|
for model_name in step_df["model"].unique():
|
|
model_dir = os.path.join(per_model_dir, model_name)
|
|
os.makedirs(model_dir, exist_ok=True)
|
|
step_df[step_df["model"] == model_name].to_csv(os.path.join(model_dir, "step_metrics.csv"), index=False)
|
|
edge_df[edge_df["model"] == model_name].to_csv(os.path.join(model_dir, "edge_metrics.csv"), index=False)
|
|
detector_df[detector_df["model"] == model_name].to_csv(os.path.join(model_dir, "detector_metrics.csv"), index=False)
|
|
|
|
|
|
def plot_step_comparison(step_df: pd.DataFrame, output_dir: str):
|
|
metrics = [
|
|
("reward", "Reward"),
|
|
("throughput", "Throughput (veh/h)"),
|
|
("mean_speed_kmh", "Mean Speed (km/h)"),
|
|
("speed_variance_norm", "Normalized Speed Variance"),
|
|
("num_stops", "Stops"),
|
|
("density", "Density (veh/km)"),
|
|
("mainline_travel_time_cumulative_mean_s", "Avg Mainline Travel Time (s)"),
|
|
]
|
|
fig, axes = plt.subplots(4, 2, figsize=(16, 15), sharex=True)
|
|
axes = axes.flatten()
|
|
|
|
for ax, (column, title) in zip(axes, metrics):
|
|
for model_name in EVAL_ORDER:
|
|
model_df = step_df[step_df["model"] == model_name]
|
|
if model_df.empty:
|
|
continue
|
|
ax.plot(model_df["step"], model_df[column], label=MODEL_LABELS[model_name], linewidth=1.8)
|
|
ax.set_title(title)
|
|
ax.set_xlabel("Step")
|
|
ax.grid(True, alpha=0.3)
|
|
for ax in axes[len(metrics):]:
|
|
ax.axis("off")
|
|
axes[0].legend()
|
|
plt.tight_layout()
|
|
plt.savefig(os.path.join(output_dir, "comparison_timeseries.png"), dpi=160)
|
|
plt.close()
|
|
|
|
|
|
def plot_reward_components(step_df: pd.DataFrame, output_dir: str):
|
|
components = [
|
|
(column, REWARD_COMPONENT_LABELS[column])
|
|
for column in REWARD_COMPONENT_COLUMNS
|
|
]
|
|
fig, axes = plt.subplots(4, 2, figsize=(15, 14), sharex=True)
|
|
axes = axes.flatten()
|
|
|
|
for ax, (column, title) in zip(axes, components):
|
|
for model_name in EVAL_ORDER:
|
|
model_df = step_df[step_df["model"] == model_name]
|
|
if model_df.empty:
|
|
continue
|
|
ax.plot(model_df["step"], model_df[column], label=MODEL_LABELS[model_name], linewidth=1.8)
|
|
ax.set_title(title)
|
|
ax.set_xlabel("Step")
|
|
ax.grid(True, alpha=0.3)
|
|
for ax in axes[len(components):]:
|
|
ax.axis("off")
|
|
axes[0].legend()
|
|
plt.tight_layout()
|
|
plt.savefig(os.path.join(output_dir, "reward_components.png"), dpi=160)
|
|
plt.close()
|
|
|
|
|
|
def plot_summary_bars(summary_df: pd.DataFrame, output_dir: str):
|
|
metrics = [
|
|
("throughput_mean", "Avg Throughput (veh/h)"),
|
|
("mean_speed_kmh_mean", "Avg Mean Speed (km/h)"),
|
|
("speed_variance_norm_mean", "Avg Normalized Speed Variance"),
|
|
("stops_total", "Total Stops"),
|
|
("mainline_travel_time_mean_s", "Avg Mainline Travel Time (s)"),
|
|
]
|
|
fig, axes = plt.subplots(3, 2, figsize=(14, 12))
|
|
axes = axes.flatten()
|
|
labels = summary_df["model_label"].tolist()
|
|
x = np.arange(len(labels))
|
|
|
|
for ax, (column, title) in zip(axes, metrics):
|
|
ax.bar(x, summary_df[column].values)
|
|
ax.set_title(title)
|
|
ax.set_xticks(x)
|
|
ax.set_xticklabels(labels)
|
|
ax.grid(True, axis="y", alpha=0.3)
|
|
for ax in axes[len(metrics):]:
|
|
ax.axis("off")
|
|
plt.tight_layout()
|
|
plt.savefig(os.path.join(output_dir, "summary_bars.png"), dpi=160)
|
|
plt.close()
|
|
|
|
|
|
def plot_model_heatmaps(edge_df: pd.DataFrame, detector_df: pd.DataFrame, output_dir: str):
|
|
heatmap_dir = os.path.join(output_dir, "heatmaps")
|
|
os.makedirs(heatmap_dir, exist_ok=True)
|
|
|
|
for model_name in EVAL_ORDER:
|
|
detector_model_df = detector_df[detector_df["model"] == model_name]
|
|
edge_model_df = edge_df[edge_df["model"] == model_name]
|
|
if detector_model_df.empty or edge_model_df.empty:
|
|
continue
|
|
cell_order = (
|
|
detector_model_df[["edge_index", "edge_id", "pos_index", "cell_id", "distance_m"]]
|
|
.drop_duplicates()
|
|
.sort_values(["distance_m", "edge_index", "pos_index"])
|
|
)
|
|
ordered_cell_ids = cell_order["cell_id"].tolist()
|
|
speed_grid = detector_model_df.pivot(index="cell_id", columns="step", values="measured_speed_kmh").reindex(ordered_cell_ids).values
|
|
occ_grid = detector_model_df.pivot(index="cell_id", columns="step", values="occupancy").reindex(ordered_cell_ids).values
|
|
|
|
edge_order = (
|
|
edge_model_df[["edge_index", "edge_id"]]
|
|
.drop_duplicates()
|
|
.sort_values("edge_index")
|
|
)
|
|
ordered_edge_ids = edge_order["edge_id"].tolist()
|
|
action_plot_df = edge_model_df.copy()
|
|
if "action_applied" in action_plot_df.columns:
|
|
action_plot_df.loc[~action_plot_df["action_applied"].astype(bool), "action_speed_kmh"] = np.nan
|
|
action_grid = (
|
|
action_plot_df.pivot(index="edge_id", columns="step", values="action_speed_kmh")
|
|
.reindex(ordered_edge_ids)
|
|
.values
|
|
)
|
|
|
|
panels = [
|
|
build_speed_panel(
|
|
speed_grid,
|
|
ordered_cell_ids,
|
|
f"{MODEL_LABELS[model_name]} Measured Speed (km/h)",
|
|
"Detector Cell (bottom=upstream, top=downstream)",
|
|
),
|
|
build_action_panel(
|
|
action_grid,
|
|
ordered_edge_ids,
|
|
f"{MODEL_LABELS[model_name]} Applied VSL (km/h)",
|
|
),
|
|
build_occupancy_panel(
|
|
occ_grid,
|
|
ordered_cell_ids,
|
|
f"{MODEL_LABELS[model_name]} Occupancy (%)",
|
|
"Detector Cell (bottom=upstream, top=downstream)",
|
|
),
|
|
]
|
|
save_heatmap_panels(
|
|
os.path.join(heatmap_dir, f"{model_name}_heatmaps.png"),
|
|
panels,
|
|
xlabel="Decision Step",
|
|
)
|
|
|
|
|
|
def _format_metric(value: float, fmt: str) -> str:
|
|
return "N/A" if pd.isna(value) else format(value, fmt)
|
|
|
|
|
|
def print_summary(summary_df: pd.DataFrame, output_dir: str):
|
|
print("\n" + "=" * 72)
|
|
print("Evaluation Summary")
|
|
print("=" * 72)
|
|
for _, row in summary_df.iterrows():
|
|
print(f"\n{row['model_label']}:")
|
|
print(f" Avg Throughput: {row['throughput_mean']:.1f} veh/h")
|
|
print(f" Avg Mean Speed: {row['mean_speed_kmh_mean']:.1f} km/h")
|
|
print(f" Avg Normalized Speed Variance: {row['speed_variance_norm_mean']:.4f}")
|
|
print(f" Total Stops: {row['stops_total']:.0f}")
|
|
print(f" Mainline Completed Vehicles: {row['mainline_completed_total']:.0f}")
|
|
print(
|
|
" Avg Mainline Travel Time: "
|
|
f"{_format_metric(row['mainline_travel_time_mean_s'], '.2f')} s"
|
|
)
|
|
print(f" Avg Density: {row['density_mean']:.2f} veh/km")
|
|
print(f" Episode Reward Sum: {row['reward_sum']:.2f}")
|
|
print("=" * 72)
|
|
print(f"Saved to: {os.path.abspath(output_dir)}")
|
|
|
|
|
|
def main():
|
|
args = parse_args()
|
|
route_file = resolve_project_path(args.route_file)
|
|
if route_file is not None and not os.path.isfile(route_file):
|
|
raise FileNotFoundError(f"Custom route file not found: {route_file}")
|
|
checkpoint_root = resolve_checkpoint_root(args.checkpoint_root)
|
|
model_dirs = discover_model_dirs(checkpoint_root, args.models)
|
|
if not model_dirs:
|
|
raise FileNotFoundError("No models matched the requested selection.")
|
|
|
|
output_dir = resolve_eval_output_dir(args.output_dir, checkpoint_root)
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
all_step_dfs = []
|
|
all_edge_dfs = []
|
|
all_detector_dfs = []
|
|
meta_rows = []
|
|
tasks = []
|
|
first_checkpoint_dir = None
|
|
for model_name in MODEL_ORDER:
|
|
checkpoint_dir = model_dirs.get(model_name)
|
|
if checkpoint_dir is None:
|
|
continue
|
|
if first_checkpoint_dir is None:
|
|
first_checkpoint_dir = checkpoint_dir
|
|
tasks.append(
|
|
(
|
|
model_name,
|
|
checkpoint_dir,
|
|
args.config,
|
|
output_dir,
|
|
args.seed,
|
|
args.begin_time,
|
|
args.end_time,
|
|
args.with_gui,
|
|
args.step_length,
|
|
route_file,
|
|
)
|
|
)
|
|
|
|
if not tasks:
|
|
raise FileNotFoundError("No evaluation tasks were created.")
|
|
|
|
tasks.insert(
|
|
0,
|
|
(
|
|
BASELINE_NAME,
|
|
first_checkpoint_dir,
|
|
args.config,
|
|
output_dir,
|
|
args.seed,
|
|
args.begin_time,
|
|
args.end_time,
|
|
args.with_gui,
|
|
args.step_length,
|
|
route_file,
|
|
),
|
|
)
|
|
|
|
default_workers = min(len(tasks), max(1, (os.cpu_count() or 2) // 2))
|
|
max_workers = args.workers or default_workers
|
|
if args.with_gui and max_workers > 1:
|
|
print("GUI evaluation requested, forcing workers=1 to avoid multiple SUMO GUI windows.")
|
|
max_workers = 1
|
|
|
|
if max_workers <= 1 or len(tasks) == 1:
|
|
for task in tasks:
|
|
model_name, checkpoint_dir, *_ = task
|
|
print(f"Evaluating {MODEL_LABELS[model_name]} from {checkpoint_dir}")
|
|
step_df, edge_df, detector_df, meta = evaluate_worker(task)
|
|
all_step_dfs.append(step_df)
|
|
all_edge_dfs.append(edge_df)
|
|
all_detector_dfs.append(detector_df)
|
|
meta_rows.append(meta)
|
|
else:
|
|
print(f"Running evaluation in parallel with {max_workers} workers")
|
|
future_to_model = {}
|
|
with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as executor:
|
|
for task in tasks:
|
|
model_name, checkpoint_dir, *_ = task
|
|
print(f"Queueing {MODEL_LABELS[model_name]} from {checkpoint_dir}")
|
|
future = executor.submit(evaluate_worker, task)
|
|
future_to_model[future] = model_name
|
|
|
|
for future in concurrent.futures.as_completed(future_to_model):
|
|
model_name = future_to_model[future]
|
|
step_df, edge_df, detector_df, meta = future.result()
|
|
print(f"Finished {MODEL_LABELS[model_name]}")
|
|
all_step_dfs.append(step_df)
|
|
all_edge_dfs.append(edge_df)
|
|
all_detector_dfs.append(detector_df)
|
|
meta_rows.append(meta)
|
|
|
|
step_df = pd.concat(all_step_dfs, ignore_index=True)
|
|
edge_df = pd.concat(all_edge_dfs, ignore_index=True)
|
|
detector_df = pd.concat(all_detector_dfs, ignore_index=True)
|
|
summary_df = build_summary(step_df)
|
|
|
|
save_csv_outputs(step_df, edge_df, detector_df, summary_df, meta_rows, output_dir)
|
|
plot_step_comparison(step_df, output_dir)
|
|
plot_reward_components(step_df, output_dir)
|
|
plot_summary_bars(summary_df, output_dir)
|
|
plot_model_heatmaps(edge_df, detector_df, output_dir)
|
|
print_summary(summary_df, output_dir)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|