diff --git a/agents/appo_agent.py b/agents/appo_agent.py index f0c3930..4d28c9d 100644 --- a/agents/appo_agent.py +++ b/agents/appo_agent.py @@ -44,7 +44,6 @@ class MultiDiscreteActorCritic(nn.Module): state_dim: int, action_dims: List[int], edge_feature_dim: int = 3, - time_feature_dim: int = 3, total_edge_count: int | None = None, controlled_start_index: int = 0, hidden_dim: int = 128, @@ -58,21 +57,15 @@ class MultiDiscreteActorCritic(nn.Module): self.num_zones = len(action_dims) self.edge_feature_dim = edge_feature_dim self.speed_feature_dim = 1 - self.time_feature_dim = time_feature_dim self.total_edge_count = int(total_edge_count if total_edge_count is not None else self.num_zones) self.controlled_start_index = int(controlled_start_index) self.controlled_end_index = self.controlled_start_index + self.num_zones if self.controlled_end_index > self.total_edge_count: raise ValueError("controlled action slice exceeds total edge count") self.last_reward_dim = 1 - self.global_feature_dim = self.time_feature_dim + self.last_reward_dim + self.global_feature_dim = self.last_reward_dim self.agent_id_dim = 1 - self.local_obs_dim = ( - self.edge_feature_dim - + self.speed_feature_dim - + self.global_feature_dim - + self.agent_id_dim - ) + self.local_obs_dim = self.edge_feature_dim + self.speed_feature_dim + self.global_feature_dim + self.agent_id_dim self.local_encoder = nn.Sequential( nn.Linear(self.local_obs_dim, hidden_dim), @@ -155,7 +148,6 @@ class APPOAgent: state_dim: int, action_dims: List[int], edge_feature_dim: int = 3, - time_feature_dim: int = 3, total_edge_count: int | None = None, controlled_start_index: int = 0, hidden_dim: int = 128, @@ -189,7 +181,6 @@ class APPOAgent: state_dim=state_dim, action_dims=action_dims, edge_feature_dim=edge_feature_dim, - time_feature_dim=time_feature_dim, total_edge_count=total_edge_count, controlled_start_index=controlled_start_index, hidden_dim=hidden_dim, diff --git a/agents/dcmappo_agent.py b/agents/dcmappo_agent.py index fcabb80..bd814f8 100644 --- a/agents/dcmappo_agent.py +++ b/agents/dcmappo_agent.py @@ -179,7 +179,6 @@ class StructuredCorridorCritic(nn.Module): self, num_agents: int, edge_feature_dim: int, - time_feature_dim: int, hidden_dim: int = 256, num_blocks: int = 2, kernel_size: int = 5, @@ -188,10 +187,9 @@ class StructuredCorridorCritic(nn.Module): super().__init__() self.num_agents = num_agents self.edge_feature_dim = edge_feature_dim - self.time_feature_dim = time_feature_dim self.speed_feature_dim = 1 self.last_reward_dim = 1 - self.global_feature_dim = self.time_feature_dim + self.last_reward_dim + self.global_feature_dim = self.last_reward_dim self.edge_token_dim = self.edge_feature_dim + self.speed_feature_dim self.edge_proj = nn.Sequential( @@ -251,7 +249,6 @@ class DCMAPPOAgent: num_agents: int, num_actions: int, edge_feature_dim: int = 3, - time_feature_dim: int = 3, total_edge_count: int | None = None, controlled_start_index: int = 0, hidden_dim: int = 256, @@ -277,7 +274,6 @@ class DCMAPPOAgent: self.num_agents = num_agents self.num_actions = num_actions self.edge_feature_dim = edge_feature_dim - self.time_feature_dim = time_feature_dim self.total_edge_count = int(total_edge_count if total_edge_count is not None else num_agents) self.controlled_start_index = int(controlled_start_index) self.controlled_end_index = self.controlled_start_index + self.num_agents @@ -294,7 +290,7 @@ class DCMAPPOAgent: self.speed_feature_dim = 1 self.last_reward_dim = 1 - self.global_feature_dim = self.time_feature_dim + self.last_reward_dim + self.global_feature_dim = self.last_reward_dim self.agent_id_dim = 1 self.edge_token_dim = self.edge_feature_dim + self.speed_feature_dim self.condition_dim = self.global_feature_dim + self.agent_id_dim @@ -312,7 +308,6 @@ class DCMAPPOAgent: self.critic = StructuredCorridorCritic( num_agents=num_agents, edge_feature_dim=edge_feature_dim, - time_feature_dim=time_feature_dim, hidden_dim=critic_hidden_dim, num_blocks=max(1, num_corridor_blocks), kernel_size=corridor_kernel_size, diff --git a/agents/dcqmix_agent.py b/agents/dcqmix_agent.py index 635ed26..abdffc9 100644 --- a/agents/dcqmix_agent.py +++ b/agents/dcqmix_agent.py @@ -120,7 +120,6 @@ class DirectionalQMixer(nn.Module): self, num_agents: int, edge_feature_dim: int, - time_feature_dim: int, total_edge_count: int, controlled_start_index: int, mixing_hidden_dim: int = 256, @@ -132,13 +131,12 @@ class DirectionalQMixer(nn.Module): super().__init__() self.num_agents = num_agents self.edge_feature_dim = edge_feature_dim - self.time_feature_dim = time_feature_dim self.speed_feature_dim = 1 self.last_reward_dim = 1 self.total_edge_count = total_edge_count self.controlled_start_index = controlled_start_index self.controlled_end_index = self.controlled_start_index + self.num_agents - self.global_feature_dim = self.time_feature_dim + self.last_reward_dim + self.global_feature_dim = self.last_reward_dim self.edge_token_dim = self.edge_feature_dim + self.speed_feature_dim self.state_encoder = DirectionalMixerStateEncoder( num_agents=self.num_agents, @@ -228,7 +226,6 @@ class DCQMIXAgent(QMIXAgent): target_update: int = 10, device: str = "cuda", edge_feature_dim: int = 3, - time_feature_dim: int = 3, total_edge_count: int | None = None, controlled_start_index: int = 0, num_corridor_blocks: int = 2, @@ -254,7 +251,6 @@ class DCQMIXAgent(QMIXAgent): target_update=target_update, device=device, edge_feature_dim=edge_feature_dim, - time_feature_dim=time_feature_dim, total_edge_count=total_edge_count, controlled_start_index=controlled_start_index, ) @@ -262,7 +258,7 @@ class DCQMIXAgent(QMIXAgent): def _build_utility_network(self, hidden_dim: int) -> nn.Module: return DirectionalUtilityNetwork( edge_token_dim=self.edge_feature_dim + self.speed_feature_dim, - condition_dim=self.time_feature_dim + self.last_reward_dim + self.agent_id_dim, + condition_dim=self.last_reward_dim + self.agent_id_dim, num_agents=self.num_agents, num_actions=self.num_actions_per_agent, hidden_dim=hidden_dim, @@ -275,7 +271,6 @@ class DCQMIXAgent(QMIXAgent): return DirectionalQMixer( num_agents=self.num_agents, edge_feature_dim=self.edge_feature_dim, - time_feature_dim=self.time_feature_dim, total_edge_count=self.total_edge_count, controlled_start_index=self.controlled_start_index, mixing_hidden_dim=mixing_hidden_dim, @@ -302,7 +297,7 @@ class DCQMIXAgent(QMIXAgent): speed_block_start = edge_block speed_block_end = speed_block_start + self.total_edge_count global_block_start = speed_block_end - global_block_end = global_block_start + self.time_feature_dim + self.last_reward_dim + global_block_end = global_block_start + self.last_reward_dim edge_features = state_tensor[:, :edge_block].view(batch_size, self.total_edge_count, self.edge_feature_dim) edge_features = edge_features[:, self.controlled_start_index:self.controlled_end_index, :] diff --git a/agents/mappo_agent.py b/agents/mappo_agent.py index fbdbfdd..41ba8bb 100644 --- a/agents/mappo_agent.py +++ b/agents/mappo_agent.py @@ -71,7 +71,6 @@ class MAPPOAgent: num_agents: int, num_actions: int, edge_feature_dim: int = 3, - time_feature_dim: int = 3, total_edge_count: int | None = None, controlled_start_index: int = 0, hidden_dim: int = 256, @@ -94,7 +93,6 @@ class MAPPOAgent: self.num_agents = num_agents self.num_actions = num_actions self.edge_feature_dim = edge_feature_dim - self.time_feature_dim = time_feature_dim self.total_edge_count = int(total_edge_count if total_edge_count is not None else num_agents) self.controlled_start_index = int(controlled_start_index) self.controlled_end_index = self.controlled_start_index + self.num_agents @@ -112,13 +110,7 @@ class MAPPOAgent: self.speed_feature_dim = 1 self.last_reward_dim = 1 self.agent_id_dim = 1 - self.local_obs_dim = ( - edge_feature_dim - + self.speed_feature_dim - + time_feature_dim - + self.last_reward_dim - + self.agent_id_dim - ) + self.local_obs_dim = edge_feature_dim + self.speed_feature_dim + self.last_reward_dim + self.agent_id_dim self.actor = SharedActor(self.local_obs_dim, num_actions, hidden_dim).to(self.device) self.critic = CentralizedCritic(state_dim, critic_hidden_dim).to(self.device) @@ -156,7 +148,7 @@ class MAPPOAgent: speed_block_start = edge_block speed_block_end = speed_block_start + self.total_edge_count global_block_start = speed_block_end - global_block_end = global_block_start + self.time_feature_dim + self.last_reward_dim + global_block_end = global_block_start + self.last_reward_dim edge_features = state_tensor[:, :edge_block].view(batch_size, self.total_edge_count, self.edge_feature_dim) edge_features = edge_features[:, self.controlled_start_index:self.controlled_end_index, :] diff --git a/agents/qmix_agent.py b/agents/qmix_agent.py index 0cbc0af..9f2189e 100644 --- a/agents/qmix_agent.py +++ b/agents/qmix_agent.py @@ -119,7 +119,6 @@ class QMIXAgent: target_update: int = 10, device: str = "cuda", edge_feature_dim: int = 3, - time_feature_dim: int = 3, total_edge_count: int | None = None, controlled_start_index: int = 0, ): @@ -134,7 +133,6 @@ class QMIXAgent: self.batch_size = int(batch_size) self.target_update = max(int(target_update), 1) self.edge_feature_dim = int(edge_feature_dim) - self.time_feature_dim = int(time_feature_dim) self.speed_feature_dim = 1 self.last_reward_dim = 1 self.agent_id_dim = 1 @@ -144,13 +142,7 @@ class QMIXAgent: if self.controlled_end_index > self.total_edge_count: raise ValueError("controlled action slice exceeds total edge count") - self.local_obs_dim = ( - self.edge_feature_dim - + self.speed_feature_dim - + self.time_feature_dim - + self.last_reward_dim - + self.agent_id_dim - ) + self.local_obs_dim = self.edge_feature_dim + self.speed_feature_dim + self.last_reward_dim + self.agent_id_dim self.utility_net = self._build_utility_network(hidden_dim).to(self.device) self.target_utility_net = self._build_utility_network(hidden_dim).to(self.device) @@ -201,7 +193,7 @@ class QMIXAgent: speed_block_start = edge_block speed_block_end = speed_block_start + self.total_edge_count global_block_start = speed_block_end - global_block_end = global_block_start + self.time_feature_dim + self.last_reward_dim + global_block_end = global_block_start + self.last_reward_dim edge_features = state_tensor[:, :edge_block].view(batch_size, self.total_edge_count, self.edge_feature_dim) edge_features = edge_features[:, self.controlled_start_index:self.controlled_end_index, :] diff --git a/agents/sctd3_agent.py b/agents/sctd3_agent.py index 57b8966..f95ff6a 100644 --- a/agents/sctd3_agent.py +++ b/agents/sctd3_agent.py @@ -80,7 +80,7 @@ class EdgeStructuredExtractor(BaseFeaturesExtractor): State layout is assumed to be: - per-edge traffic features: [speed_norm, occ_norm, flow_norm] * num_edges - per-edge current limit feature: [limit_norm] * num_edges - - global features: [time_progress, sin_t, cos_t, last_reward] + - global features: [last_reward] """ def __init__( @@ -88,7 +88,7 @@ class EdgeStructuredExtractor(BaseFeaturesExtractor): observation_space: spaces.Box, num_edges: int, edge_feature_dim: int = 3, - global_feature_dim: int = 4, + global_feature_dim: int = 1, total_edge_count: int | None = None, controlled_start_index: int = 0, edge_hidden_dim: int = 16, @@ -279,7 +279,7 @@ class SCTD3Agent: "features_extractor_kwargs": { "num_edges": self.num_zones, "edge_feature_dim": edge_feature_dim, - "global_feature_dim": 4, + "global_feature_dim": 1, "total_edge_count": total_edge_count if total_edge_count is not None else self.num_zones, "controlled_start_index": controlled_start_index, "edge_hidden_dim": extractor_edge_hidden_dim, diff --git a/agents/tcamappo_agent.py b/agents/tacmappo_agent.py similarity index 97% rename from agents/tcamappo_agent.py rename to agents/tacmappo_agent.py index 3092c7a..1b75c6f 100644 --- a/agents/tcamappo_agent.py +++ b/agents/tacmappo_agent.py @@ -1,5 +1,5 @@ """ -Temporal Credit Assignment MAPPO for SUMO VSL. +TAC-MAPPO for SUMO VSL. - Actor: same decentralized shared actor style as MAPPO - Critic: current-state query attends over recent decision/outcome history @@ -122,7 +122,7 @@ class TemporalCreditCritic(nn.Module): return self.head(fused) -class TCAMAPPOAgent: +class TACMAPPOAgent: """MAPPO with a temporal credit-assignment critic.""" def __init__( @@ -131,7 +131,6 @@ class TCAMAPPOAgent: num_agents: int, num_actions: int, edge_feature_dim: int = 3, - time_feature_dim: int = 3, total_edge_count: int | None = None, controlled_start_index: int = 0, hidden_dim: int = 256, @@ -158,7 +157,6 @@ class TCAMAPPOAgent: self.num_agents = num_agents self.num_actions = num_actions self.edge_feature_dim = edge_feature_dim - self.time_feature_dim = time_feature_dim self.total_edge_count = int(total_edge_count if total_edge_count is not None else num_agents) self.controlled_start_index = int(controlled_start_index) self.controlled_end_index = self.controlled_start_index + self.num_agents @@ -177,13 +175,7 @@ class TCAMAPPOAgent: self.speed_feature_dim = 1 self.last_reward_dim = 1 self.agent_id_dim = 1 - self.local_obs_dim = ( - edge_feature_dim - + self.speed_feature_dim - + time_feature_dim - + self.last_reward_dim - + self.agent_id_dim - ) + self.local_obs_dim = edge_feature_dim + self.speed_feature_dim + self.last_reward_dim + self.agent_id_dim self.reward_feature_dim = 1 + len(REWARD_COMPONENT_COLUMNS) self.history_token_dim = state_dim + num_agents + self.reward_feature_dim @@ -239,7 +231,7 @@ class TCAMAPPOAgent: speed_block_start = edge_block speed_block_end = speed_block_start + self.total_edge_count global_block_start = speed_block_end - global_block_end = global_block_start + self.time_feature_dim + self.last_reward_dim + global_block_end = global_block_start + self.last_reward_dim edge_features = state_tensor[:, :edge_block].view(batch_size, self.total_edge_count, self.edge_feature_dim) edge_features = edge_features[:, self.controlled_start_index:self.controlled_end_index, :] diff --git a/config_sumo_vsl.yaml b/config_sumo_vsl.yaml index b85eebf..6f1c9a1 100644 --- a/config_sumo_vsl.yaml +++ b/config_sumo_vsl.yaml @@ -243,7 +243,7 @@ agents: batch_size: 15 lr_schedule: "cosine" - tcamappo: + tacmappo: hidden_dim: 256 critic_hidden_dim: 256 history_window: 6 diff --git a/envs/edge_vsl_env.py b/envs/edge_vsl_env.py index e744921..a0c4d48 100644 --- a/envs/edge_vsl_env.py +++ b/envs/edge_vsl_env.py @@ -175,7 +175,7 @@ class SUMOEdgeVSLEnvironment: self.action_dims = [self.num_speed_actions] * self.num_controlled_edges self.features_per_edge = 3 - self._state_dim = (self.features_per_edge + 1) * self.num_edges + 3 + 1 + self._state_dim = (self.features_per_edge + 1) * self.num_edges + 1 self.current_step = 0 self._sumo_running = False @@ -810,7 +810,6 @@ class SUMOEdgeVSLEnvironment: done = self.current_step >= self.episode_length info["reward"] = reward - info["step"] = self.current_step info["edge_speeds_kmh"] = (edge_speeds * 3.6).tolist() info["action_applied_mask"] = [ bool(apply_control and idx not in self.passive_segment_indices) @@ -1024,10 +1023,6 @@ class SUMOEdgeVSLEnvironment: for idx in range(self.num_edges): state_parts.append(self.current_edge_speeds[idx] / self.free_flow_speed) - time_progress = self.current_step / max(self.episode_length, 1) - state_parts.append(time_progress) - state_parts.append(np.sin(2 * np.pi * time_progress)) - state_parts.append(np.cos(2 * np.pi * time_progress)) state_parts.append(self._last_reward) return np.array(state_parts, dtype=np.float32) diff --git a/envs/reward_design_blueprint.py b/envs/reward_design_blueprint.py index 295ffaa..409b225 100644 --- a/envs/reward_design_blueprint.py +++ b/envs/reward_design_blueprint.py @@ -73,7 +73,7 @@ class RewardBlueprint: return "\n".join(lines).rstrip() + "\n" -def build_tca_mappo_reward_blueprint() -> RewardBlueprint: +def build_tac_mappo_reward_blueprint() -> RewardBlueprint: """Build the April-style four-term reward blueprint for corridor VSL.""" terms = ( @@ -127,7 +127,7 @@ def build_tca_mappo_reward_blueprint() -> RewardBlueprint: ) return RewardBlueprint( - name="Four-Term Reward Blueprint For TCA-MAPPO", + name="Four-Term Reward Blueprint For TAC-MAPPO", scenario_summary=( "The study controls a segmented freeway VSL corridor under fixed control intervals. " "The reward should stay simple, bounded, and decomposable so that each term has a clear " @@ -162,12 +162,12 @@ def build_tca_mappo_reward_blueprint() -> RewardBlueprint: def build_reward_blueprint_markdown() -> str: - return build_tca_mappo_reward_blueprint().to_markdown() + return build_tac_mappo_reward_blueprint().to_markdown() def iter_required_signals() -> Iterable[str]: signals = set() - for term in build_tca_mappo_reward_blueprint().terms: + for term in build_tac_mappo_reward_blueprint().terms: signals.update(term.required_signals) return tuple(sorted(signals)) diff --git a/scripts/evaluate_models.py b/scripts/evaluate_models.py index d025962..e5a891d 100644 --- a/scripts/evaluate_models.py +++ b/scripts/evaluate_models.py @@ -35,7 +35,7 @@ from agents.dcqmix_agent import DCQMIXAgent from agents.rule_vsl_agent import RULE_VSL_AGENT_CLASSES from agents.sac_agent import SACAgent from agents.sctd3_agent import SCTD3Agent -from agents.tcamappo_agent import TCAMAPPOAgent +from agents.tacmappo_agent import TACMAPPOAgent from agents.td3_agent import TD3Agent from envs.edge_vsl_env import SUMOEdgeVSLEnvironment from envs.reward_system import REWARD_COMPONENT_COLUMNS, REWARD_COMPONENT_LABELS @@ -50,7 +50,7 @@ from utils.run_dirs import find_shared_config_path, resolve_checkpoint_root RULE_BASELINE_NAMES = ("occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl") -MODEL_ORDER = [*RULE_BASELINE_NAMES, "ppo", "gpro", "appo", "mappo", "tcamappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"] +MODEL_ORDER = [*RULE_BASELINE_NAMES, "ppo", "gpro", "appo", "mappo", "tacmappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"] BASELINE_NAME = "no_control" EVAL_ORDER = [BASELINE_NAME] + MODEL_ORDER MODEL_LABELS = { @@ -62,7 +62,7 @@ MODEL_LABELS = { "gpro": "GPRO-PPO", "appo": "APPO", "mappo": "MAPPO", - "tcamappo": "TCA-MAPPO", + "tacmappo": "TAC-MAPPO", "dcmappo": "DC-MAPPO", "dqn": "DQN", "madqn": "MA-DQN", @@ -101,7 +101,7 @@ def parse_args(): "--models", nargs="*", default=None, - help="Subset of models to evaluate, e.g. --models occ_rule_vsl bottleneck_rule_vsl harmonization_rule_vsl ppo gpro tcamappo dcmappo dqn madqn ddqn qmix dcqmix ddpg d3pg sac td3 sctd3", + help="Subset of models to evaluate, e.g. --models occ_rule_vsl bottleneck_rule_vsl harmonization_rule_vsl ppo gpro tacmappo dcmappo dqn madqn ddqn qmix dcqmix ddpg d3pg sac td3 sctd3", ) parser.add_argument("--seed", type=int, default=42, help="Evaluation seed.") parser.add_argument( @@ -265,7 +265,7 @@ def resolve_project_path(path_str: Optional[str]) -> Optional[str]: def resolve_model_load_path(model_name: str, checkpoint_dir: str) -> str: if model_name in RULE_BASELINE_NAMES: return "" - if model_name in {"ppo", "gpro", "appo", "mappo", "tcamappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix"}: + if model_name in {"ppo", "gpro", "appo", "mappo", "tacmappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix"}: best_path = os.path.join(checkpoint_dir, "model_best.pt") if os.path.isfile(best_path): return best_path @@ -304,7 +304,6 @@ def build_value_based_agent(agent_cls, agent_cfg: dict, env: SUMOEdgeVSLEnvironm "target_update": agent_cfg.get("target_update", 5), "device": agent_cfg.get("device", "cuda"), "edge_feature_dim": env.features_per_edge, - "time_feature_dim": 3, "total_edge_count": env.num_edges, "controlled_start_index": env.controlled_edge_start_index, "num_corridor_blocks": agent_cfg.get("num_corridor_blocks", 2), @@ -431,8 +430,8 @@ def build_agent(model_name: str, config: dict, env: SUMOEdgeVSLEnvironment): lr_schedule=agent_cfg.get("lr_schedule", "cosine"), total_episodes=config.get("training", {}).get("num_episodes", 4000), ) - if model_name == "tcamappo": - return TCAMAPPOAgent( + if model_name == "tacmappo": + return TACMAPPOAgent( state_dim=env.state_dim, num_agents=env.num_controlled_edges, num_actions=env.action_dim, diff --git a/scripts/plot_live_training.py b/scripts/plot_live_training.py index 21e27f0..286aad6 100644 --- a/scripts/plot_live_training.py +++ b/scripts/plot_live_training.py @@ -20,7 +20,7 @@ from envs.reward_system import REWARD_COMPONENT_COLUMNS, REWARD_COMPONENT_LABELS from utils.run_dirs import find_latest_run_root, find_run_root_by_timestamp -MODEL_ORDER = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "appo", "mappo", "tcamappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"] +MODEL_ORDER = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "appo", "mappo", "tacmappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"] MODEL_LABELS = { "occ_rule_vsl": "Occ-Rule-VSL", "bottleneck_rule_vsl": "Bottleneck-Rule-VSL", @@ -29,7 +29,7 @@ MODEL_LABELS = { "gpro": "GPRO-PPO", "appo": "APPO", "mappo": "MAPPO", - "tcamappo": "TCA-MAPPO", + "tacmappo": "TAC-MAPPO", "dcmappo": "DC-MAPPO", "dqn": "DQN", "madqn": "MA-DQN", @@ -50,7 +50,7 @@ MODEL_COLORS = { "gpro": "#6a3d9a", "appo": "#ff7f0e", "mappo": "#2ca02c", - "tcamappo": "#7f7f7f", + "tacmappo": "#7f7f7f", "dcmappo": "#8c564b", "dqn": "#d62728", "madqn": "#ff9896", @@ -69,7 +69,7 @@ FLOW_LABEL = REWARD_COMPONENT_LABELS.get(FLOW_COLUMN, "Flow") def parse_args(): parser = argparse.ArgumentParser(description="Plot live training progress from run logs.") - parser.add_argument("--model", default=None, help="Model name, e.g. ppo/gpro/appo/mappo/tcamappo/dcmappo/dqn/madqn/ddqn/qmix/dcqmix/ddpg/d3pg/sac/td3/sctd3") + parser.add_argument("--model", default=None, help="Model name, e.g. ppo/gpro/appo/mappo/tacmappo/dcmappo/dqn/madqn/ddqn/qmix/dcqmix/ddpg/d3pg/sac/td3/sctd3") parser.add_argument( "--all-models", action="store_true", diff --git a/training/registry.py b/training/registry.py index f731f78..66ac6d0 100644 --- a/training/registry.py +++ b/training/registry.py @@ -20,13 +20,13 @@ from training.train_rule_vsl import ( ) from training.train_sac import train_sumo_sac from training.train_sctd3 import train_sumo_sctd3 -from training.train_tcamappo import train_sumo_tcamappo +from training.train_tacmappo import train_sumo_tacmappo from training.train_td3 import train_sumo_td3 # DEFAULT_MODELS: List[str] = ["ppo"] -DEFAULT_MODELS: List[str] = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "mappo", "tcamappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3"] -ALL_MODELS: List[str] = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "appo", "mappo", "tcamappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"] +DEFAULT_MODELS: List[str] = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "mappo", "tacmappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3"] +ALL_MODELS: List[str] = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "appo", "mappo", "tacmappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"] TRAINERS: Dict[str, Callable] = { @@ -37,7 +37,7 @@ TRAINERS: Dict[str, Callable] = { "gpro": train_sumo_gpro, "appo": train_sumo_appo, "mappo": train_sumo_mappo, - "tcamappo": train_sumo_tcamappo, + "tacmappo": train_sumo_tacmappo, "dcmappo": train_sumo_dcmappo, "dqn": train_sumo_dqn, "madqn": train_sumo_madqn, diff --git a/training/train_tcamappo.py b/training/train_tacmappo.py similarity index 96% rename from training/train_tcamappo.py rename to training/train_tacmappo.py index ff3ae15..84d3f85 100644 --- a/training/train_tcamappo.py +++ b/training/train_tacmappo.py @@ -1,5 +1,5 @@ """ -Temporal Credit Assignment MAPPO training script for SUMO + TraCI VSL. +TAC-MAPPO training script for SUMO + TraCI VSL. """ import copy import os @@ -11,7 +11,7 @@ from tqdm import tqdm matplotlib.use("Agg") -from agents.tcamappo_agent import TCAMAPPOAgent +from agents.tacmappo_agent import TACMAPPOAgent from envs.edge_vsl_env import SUMOEdgeVSLEnvironment from envs.reward_system import REWARD_COMPONENT_COLUMNS, average_reward_components, init_reward_component_totals from utils.config import get_agent_config, get_training_config @@ -22,17 +22,17 @@ from utils.run_dirs import resolve_run_dirs, write_shared_run_config from utils.seeding import derive_seed, resolve_base_seed, set_global_seed -def train_sumo_tcamappo(log_dir=None, checkpoint_dir=None, run_timestamp=None): +def train_sumo_tacmappo(log_dir=None, checkpoint_dir=None, run_timestamp=None): with open("config_sumo_vsl.yaml", "r", encoding="utf-8") as f: config = yaml.safe_load(f) - agent_config = get_agent_config(config, "tcamappo") + agent_config = get_agent_config(config, "tacmappo") train_config = get_training_config(config) base_seed = resolve_base_seed(train_config) set_global_seed(base_seed) _, checkpoint_dir, log_dir = resolve_run_dirs( - "tcamappo", + "tacmappo", log_dir=log_dir, checkpoint_dir=checkpoint_dir, run_timestamp=run_timestamp, @@ -49,11 +49,11 @@ def train_sumo_tcamappo(log_dir=None, checkpoint_dir=None, run_timestamp=None): run_timestamp=run_timestamp, ) - logger = TrainingLogger(log_dir, "tcamappo") + logger = TrainingLogger(log_dir, "tacmappo") env = SUMOEdgeVSLEnvironment(runtime_config) print("=" * 70) - print("TCA-MAPPO training - SUMO+TraCI VSL environment") + print("TAC-MAPPO training - SUMO+TraCI VSL environment") print("=" * 70) print(f" State dim: {env.state_dim}") print(f" Agents: {env.num_edges}") @@ -68,7 +68,7 @@ def train_sumo_tcamappo(log_dir=None, checkpoint_dir=None, run_timestamp=None): print(f" Global seed: {base_seed if base_seed is not None else 'None (random)'}") print() - agent = TCAMAPPOAgent( + agent = TACMAPPOAgent( state_dim=env.state_dim, num_agents=env.num_controlled_edges, num_actions=env.action_dim, diff --git a/training/train_value_based.py b/training/train_value_based.py index 87cc938..c8852c0 100644 --- a/training/train_value_based.py +++ b/training/train_value_based.py @@ -43,7 +43,6 @@ def _build_value_based_agent(agent_builder: Callable[..., object], env, agent_co "target_update": agent_config.get("target_update", 10), "device": agent_config.get("device", "cuda"), "edge_feature_dim": env.features_per_edge, - "time_feature_dim": 3, "total_edge_count": env.num_edges, "controlled_start_index": env.controlled_edge_start_index, "num_corridor_blocks": agent_config.get("num_corridor_blocks", 2),