在训练中删去训练时长相关信息和时序信息

2026-05-11 17:31:06 +08:00 · 2026-05-11 17:31:06 +08:00 · cb7c7964ae
parent ad65d7caf6
commit cb7c7964ae
15 changed files with 47 additions and 97 deletions
--- a/agents/appo_agent.py
+++ b/agents/appo_agent.py
@ -44,7 +44,6 @@ class MultiDiscreteActorCritic(nn.Module):
        state_dim: int,
        action_dims: List[int],
        edge_feature_dim: int = 3,
-        time_feature_dim: int = 3,
        total_edge_count: int | None = None,
        controlled_start_index: int = 0,
        hidden_dim: int = 128,
@ -58,21 +57,15 @@ class MultiDiscreteActorCritic(nn.Module):
        self.num_zones = len(action_dims)
        self.edge_feature_dim = edge_feature_dim
        self.speed_feature_dim = 1
-        self.time_feature_dim = time_feature_dim
        self.total_edge_count = int(total_edge_count if total_edge_count is not None else self.num_zones)
        self.controlled_start_index = int(controlled_start_index)
        self.controlled_end_index = self.controlled_start_index + self.num_zones
        if self.controlled_end_index > self.total_edge_count:
            raise ValueError("controlled action slice exceeds total edge count")
        self.last_reward_dim = 1
-        self.global_feature_dim = self.time_feature_dim + self.last_reward_dim
+        self.global_feature_dim = self.last_reward_dim
        self.agent_id_dim = 1
-        self.local_obs_dim = (
-            self.edge_feature_dim
-            + self.speed_feature_dim
-            + self.global_feature_dim
-            + self.agent_id_dim
-        )
+        self.local_obs_dim = self.edge_feature_dim + self.speed_feature_dim + self.global_feature_dim + self.agent_id_dim

        self.local_encoder = nn.Sequential(
            nn.Linear(self.local_obs_dim, hidden_dim),
@ -155,7 +148,6 @@ class APPOAgent:
        state_dim: int,
        action_dims: List[int],
        edge_feature_dim: int = 3,
-        time_feature_dim: int = 3,
        total_edge_count: int | None = None,
        controlled_start_index: int = 0,
        hidden_dim: int = 128,
@ -189,7 +181,6 @@ class APPOAgent:
            state_dim=state_dim,
            action_dims=action_dims,
            edge_feature_dim=edge_feature_dim,
-            time_feature_dim=time_feature_dim,
            total_edge_count=total_edge_count,
            controlled_start_index=controlled_start_index,
            hidden_dim=hidden_dim,
--- a/agents/dcmappo_agent.py
+++ b/agents/dcmappo_agent.py
@ -179,7 +179,6 @@ class StructuredCorridorCritic(nn.Module):
        self,
        num_agents: int,
        edge_feature_dim: int,
-        time_feature_dim: int,
        hidden_dim: int = 256,
        num_blocks: int = 2,
        kernel_size: int = 5,
@ -188,10 +187,9 @@ class StructuredCorridorCritic(nn.Module):
        super().__init__()
        self.num_agents = num_agents
        self.edge_feature_dim = edge_feature_dim
-        self.time_feature_dim = time_feature_dim
        self.speed_feature_dim = 1
        self.last_reward_dim = 1
-        self.global_feature_dim = self.time_feature_dim + self.last_reward_dim
+        self.global_feature_dim = self.last_reward_dim
        self.edge_token_dim = self.edge_feature_dim + self.speed_feature_dim

        self.edge_proj = nn.Sequential(
@ -251,7 +249,6 @@ class DCMAPPOAgent:
        num_agents: int,
        num_actions: int,
        edge_feature_dim: int = 3,
-        time_feature_dim: int = 3,
        total_edge_count: int | None = None,
        controlled_start_index: int = 0,
        hidden_dim: int = 256,
@ -277,7 +274,6 @@ class DCMAPPOAgent:
        self.num_agents = num_agents
        self.num_actions = num_actions
        self.edge_feature_dim = edge_feature_dim
-        self.time_feature_dim = time_feature_dim
        self.total_edge_count = int(total_edge_count if total_edge_count is not None else num_agents)
        self.controlled_start_index = int(controlled_start_index)
        self.controlled_end_index = self.controlled_start_index + self.num_agents
@ -294,7 +290,7 @@ class DCMAPPOAgent:

        self.speed_feature_dim = 1
        self.last_reward_dim = 1
-        self.global_feature_dim = self.time_feature_dim + self.last_reward_dim
+        self.global_feature_dim = self.last_reward_dim
        self.agent_id_dim = 1
        self.edge_token_dim = self.edge_feature_dim + self.speed_feature_dim
        self.condition_dim = self.global_feature_dim + self.agent_id_dim
@ -312,7 +308,6 @@ class DCMAPPOAgent:
        self.critic = StructuredCorridorCritic(
            num_agents=num_agents,
            edge_feature_dim=edge_feature_dim,
-            time_feature_dim=time_feature_dim,
            hidden_dim=critic_hidden_dim,
            num_blocks=max(1, num_corridor_blocks),
            kernel_size=corridor_kernel_size,
--- a/agents/dcqmix_agent.py
+++ b/agents/dcqmix_agent.py
@ -120,7 +120,6 @@ class DirectionalQMixer(nn.Module):
        self,
        num_agents: int,
        edge_feature_dim: int,
-        time_feature_dim: int,
        total_edge_count: int,
        controlled_start_index: int,
        mixing_hidden_dim: int = 256,
@ -132,13 +131,12 @@ class DirectionalQMixer(nn.Module):
        super().__init__()
        self.num_agents = num_agents
        self.edge_feature_dim = edge_feature_dim
-        self.time_feature_dim = time_feature_dim
        self.speed_feature_dim = 1
        self.last_reward_dim = 1
        self.total_edge_count = total_edge_count
        self.controlled_start_index = controlled_start_index
        self.controlled_end_index = self.controlled_start_index + self.num_agents
-        self.global_feature_dim = self.time_feature_dim + self.last_reward_dim
+        self.global_feature_dim = self.last_reward_dim
        self.edge_token_dim = self.edge_feature_dim + self.speed_feature_dim
        self.state_encoder = DirectionalMixerStateEncoder(
            num_agents=self.num_agents,
@ -228,7 +226,6 @@ class DCQMIXAgent(QMIXAgent):
        target_update: int = 10,
        device: str = "cuda",
        edge_feature_dim: int = 3,
-        time_feature_dim: int = 3,
        total_edge_count: int | None = None,
        controlled_start_index: int = 0,
        num_corridor_blocks: int = 2,
@ -254,7 +251,6 @@ class DCQMIXAgent(QMIXAgent):
            target_update=target_update,
            device=device,
            edge_feature_dim=edge_feature_dim,
-            time_feature_dim=time_feature_dim,
            total_edge_count=total_edge_count,
            controlled_start_index=controlled_start_index,
        )
@ -262,7 +258,7 @@ class DCQMIXAgent(QMIXAgent):
    def _build_utility_network(self, hidden_dim: int) -> nn.Module:
        return DirectionalUtilityNetwork(
            edge_token_dim=self.edge_feature_dim + self.speed_feature_dim,
-            condition_dim=self.time_feature_dim + self.last_reward_dim + self.agent_id_dim,
+            condition_dim=self.last_reward_dim + self.agent_id_dim,
            num_agents=self.num_agents,
            num_actions=self.num_actions_per_agent,
            hidden_dim=hidden_dim,
@ -275,7 +271,6 @@ class DCQMIXAgent(QMIXAgent):
        return DirectionalQMixer(
            num_agents=self.num_agents,
            edge_feature_dim=self.edge_feature_dim,
-            time_feature_dim=self.time_feature_dim,
            total_edge_count=self.total_edge_count,
            controlled_start_index=self.controlled_start_index,
            mixing_hidden_dim=mixing_hidden_dim,
@ -302,7 +297,7 @@ class DCQMIXAgent(QMIXAgent):
        speed_block_start = edge_block
        speed_block_end = speed_block_start + self.total_edge_count
        global_block_start = speed_block_end
-        global_block_end = global_block_start + self.time_feature_dim + self.last_reward_dim
+        global_block_end = global_block_start + self.last_reward_dim

        edge_features = state_tensor[:, :edge_block].view(batch_size, self.total_edge_count, self.edge_feature_dim)
        edge_features = edge_features[:, self.controlled_start_index:self.controlled_end_index, :]
--- a/agents/mappo_agent.py
+++ b/agents/mappo_agent.py
@ -71,7 +71,6 @@ class MAPPOAgent:
        num_agents: int,
        num_actions: int,
        edge_feature_dim: int = 3,
-        time_feature_dim: int = 3,
        total_edge_count: int | None = None,
        controlled_start_index: int = 0,
        hidden_dim: int = 256,
@ -94,7 +93,6 @@ class MAPPOAgent:
        self.num_agents = num_agents
        self.num_actions = num_actions
        self.edge_feature_dim = edge_feature_dim
-        self.time_feature_dim = time_feature_dim
        self.total_edge_count = int(total_edge_count if total_edge_count is not None else num_agents)
        self.controlled_start_index = int(controlled_start_index)
        self.controlled_end_index = self.controlled_start_index + self.num_agents
@ -112,13 +110,7 @@ class MAPPOAgent:
        self.speed_feature_dim = 1
        self.last_reward_dim = 1
        self.agent_id_dim = 1
-        self.local_obs_dim = (
-            edge_feature_dim
-            + self.speed_feature_dim
-            + time_feature_dim
-            + self.last_reward_dim
-            + self.agent_id_dim
-        )
+        self.local_obs_dim = edge_feature_dim + self.speed_feature_dim + self.last_reward_dim + self.agent_id_dim

        self.actor = SharedActor(self.local_obs_dim, num_actions, hidden_dim).to(self.device)
        self.critic = CentralizedCritic(state_dim, critic_hidden_dim).to(self.device)
@ -156,7 +148,7 @@ class MAPPOAgent:
        speed_block_start = edge_block
        speed_block_end = speed_block_start + self.total_edge_count
        global_block_start = speed_block_end
-        global_block_end = global_block_start + self.time_feature_dim + self.last_reward_dim
+        global_block_end = global_block_start + self.last_reward_dim

        edge_features = state_tensor[:, :edge_block].view(batch_size, self.total_edge_count, self.edge_feature_dim)
        edge_features = edge_features[:, self.controlled_start_index:self.controlled_end_index, :]
--- a/agents/qmix_agent.py
+++ b/agents/qmix_agent.py
@ -119,7 +119,6 @@ class QMIXAgent:
        target_update: int = 10,
        device: str = "cuda",
        edge_feature_dim: int = 3,
-        time_feature_dim: int = 3,
        total_edge_count: int | None = None,
        controlled_start_index: int = 0,
    ):
@ -134,7 +133,6 @@ class QMIXAgent:
        self.batch_size = int(batch_size)
        self.target_update = max(int(target_update), 1)
        self.edge_feature_dim = int(edge_feature_dim)
-        self.time_feature_dim = int(time_feature_dim)
        self.speed_feature_dim = 1
        self.last_reward_dim = 1
        self.agent_id_dim = 1
@ -144,13 +142,7 @@ class QMIXAgent:
        if self.controlled_end_index > self.total_edge_count:
            raise ValueError("controlled action slice exceeds total edge count")

-        self.local_obs_dim = (
-            self.edge_feature_dim
-            + self.speed_feature_dim
-            + self.time_feature_dim
-            + self.last_reward_dim
-            + self.agent_id_dim
-        )
+        self.local_obs_dim = self.edge_feature_dim + self.speed_feature_dim + self.last_reward_dim + self.agent_id_dim

        self.utility_net = self._build_utility_network(hidden_dim).to(self.device)
        self.target_utility_net = self._build_utility_network(hidden_dim).to(self.device)
@ -201,7 +193,7 @@ class QMIXAgent:
        speed_block_start = edge_block
        speed_block_end = speed_block_start + self.total_edge_count
        global_block_start = speed_block_end
-        global_block_end = global_block_start + self.time_feature_dim + self.last_reward_dim
+        global_block_end = global_block_start + self.last_reward_dim

        edge_features = state_tensor[:, :edge_block].view(batch_size, self.total_edge_count, self.edge_feature_dim)
        edge_features = edge_features[:, self.controlled_start_index:self.controlled_end_index, :]
--- a/agents/sctd3_agent.py
+++ b/agents/sctd3_agent.py
@ -80,7 +80,7 @@ class EdgeStructuredExtractor(BaseFeaturesExtractor):
    State layout is assumed to be:
    - per-edge traffic features: [speed_norm, occ_norm, flow_norm] * num_edges
    - per-edge current limit feature: [limit_norm] * num_edges
-    - global features: [time_progress, sin_t, cos_t, last_reward]
+    - global features: [last_reward]
    """

    def __init__(
@ -88,7 +88,7 @@ class EdgeStructuredExtractor(BaseFeaturesExtractor):
        observation_space: spaces.Box,
        num_edges: int,
        edge_feature_dim: int = 3,
-        global_feature_dim: int = 4,
+        global_feature_dim: int = 1,
        total_edge_count: int | None = None,
        controlled_start_index: int = 0,
        edge_hidden_dim: int = 16,
@ -279,7 +279,7 @@ class SCTD3Agent:
            "features_extractor_kwargs": {
                "num_edges": self.num_zones,
                "edge_feature_dim": edge_feature_dim,
-                "global_feature_dim": 4,
+                "global_feature_dim": 1,
                "total_edge_count": total_edge_count if total_edge_count is not None else self.num_zones,
                "controlled_start_index": controlled_start_index,
                "edge_hidden_dim": extractor_edge_hidden_dim,
--- a/agents/tacmappo_agent.py
+++ b/agents/tacmappo_agent.py
@ -1,5 +1,5 @@
 """
-Temporal Credit Assignment MAPPO for SUMO VSL.
+TAC-MAPPO for SUMO VSL.

 - Actor: same decentralized shared actor style as MAPPO
 - Critic: current-state query attends over recent decision/outcome history
@ -122,7 +122,7 @@ class TemporalCreditCritic(nn.Module):
        return self.head(fused)


-class TCAMAPPOAgent:
+class TACMAPPOAgent:
    """MAPPO with a temporal credit-assignment critic."""

    def __init__(
@ -131,7 +131,6 @@ class TCAMAPPOAgent:
        num_agents: int,
        num_actions: int,
        edge_feature_dim: int = 3,
-        time_feature_dim: int = 3,
        total_edge_count: int | None = None,
        controlled_start_index: int = 0,
        hidden_dim: int = 256,
@ -158,7 +157,6 @@ class TCAMAPPOAgent:
        self.num_agents = num_agents
        self.num_actions = num_actions
        self.edge_feature_dim = edge_feature_dim
-        self.time_feature_dim = time_feature_dim
        self.total_edge_count = int(total_edge_count if total_edge_count is not None else num_agents)
        self.controlled_start_index = int(controlled_start_index)
        self.controlled_end_index = self.controlled_start_index + self.num_agents
@ -177,13 +175,7 @@ class TCAMAPPOAgent:
        self.speed_feature_dim = 1
        self.last_reward_dim = 1
        self.agent_id_dim = 1
-        self.local_obs_dim = (
-            edge_feature_dim
-            + self.speed_feature_dim
-            + time_feature_dim
-            + self.last_reward_dim
-            + self.agent_id_dim
-        )
+        self.local_obs_dim = edge_feature_dim + self.speed_feature_dim + self.last_reward_dim + self.agent_id_dim
        self.reward_feature_dim = 1 + len(REWARD_COMPONENT_COLUMNS)
        self.history_token_dim = state_dim + num_agents + self.reward_feature_dim

@ -239,7 +231,7 @@ class TCAMAPPOAgent:
        speed_block_start = edge_block
        speed_block_end = speed_block_start + self.total_edge_count
        global_block_start = speed_block_end
-        global_block_end = global_block_start + self.time_feature_dim + self.last_reward_dim
+        global_block_end = global_block_start + self.last_reward_dim

        edge_features = state_tensor[:, :edge_block].view(batch_size, self.total_edge_count, self.edge_feature_dim)
        edge_features = edge_features[:, self.controlled_start_index:self.controlled_end_index, :]
--- a/config_sumo_vsl.yaml
+++ b/config_sumo_vsl.yaml
@ -243,7 +243,7 @@ agents:
    batch_size: 15
    lr_schedule: "cosine"

-  tcamappo:
+  tacmappo:
    hidden_dim: 256
    critic_hidden_dim: 256
    history_window: 6
--- a/envs/edge_vsl_env.py
+++ b/envs/edge_vsl_env.py
@ -175,7 +175,7 @@ class SUMOEdgeVSLEnvironment:

        self.action_dims = [self.num_speed_actions] * self.num_controlled_edges
        self.features_per_edge = 3
-        self._state_dim = (self.features_per_edge + 1) * self.num_edges + 3 + 1
+        self._state_dim = (self.features_per_edge + 1) * self.num_edges + 1

        self.current_step = 0
        self._sumo_running = False
@ -810,7 +810,6 @@ class SUMOEdgeVSLEnvironment:
        done = self.current_step >= self.episode_length

        info["reward"] = reward
-        info["step"] = self.current_step
        info["edge_speeds_kmh"] = (edge_speeds * 3.6).tolist()
        info["action_applied_mask"] = [
            bool(apply_control and idx not in self.passive_segment_indices)
@ -1024,10 +1023,6 @@ class SUMOEdgeVSLEnvironment:
        for idx in range(self.num_edges):
            state_parts.append(self.current_edge_speeds[idx] / self.free_flow_speed)

-        time_progress = self.current_step / max(self.episode_length, 1)
-        state_parts.append(time_progress)
-        state_parts.append(np.sin(2 * np.pi * time_progress))
-        state_parts.append(np.cos(2 * np.pi * time_progress))
        state_parts.append(self._last_reward)

        return np.array(state_parts, dtype=np.float32)
--- a/envs/reward_design_blueprint.py
+++ b/envs/reward_design_blueprint.py
@ -73,7 +73,7 @@ class RewardBlueprint:
        return "\n".join(lines).rstrip() + "\n"


-def build_tca_mappo_reward_blueprint() -> RewardBlueprint:
+def build_tac_mappo_reward_blueprint() -> RewardBlueprint:
    """Build the April-style four-term reward blueprint for corridor VSL."""

    terms = (
@ -127,7 +127,7 @@ def build_tca_mappo_reward_blueprint() -> RewardBlueprint:
    )

    return RewardBlueprint(
-        name="Four-Term Reward Blueprint For TCA-MAPPO",
+        name="Four-Term Reward Blueprint For TAC-MAPPO",
        scenario_summary=(
            "The study controls a segmented freeway VSL corridor under fixed control intervals. "
            "The reward should stay simple, bounded, and decomposable so that each term has a clear "
@ -162,12 +162,12 @@ def build_tca_mappo_reward_blueprint() -> RewardBlueprint:


 def build_reward_blueprint_markdown() -> str:
-    return build_tca_mappo_reward_blueprint().to_markdown()
+    return build_tac_mappo_reward_blueprint().to_markdown()


 def iter_required_signals() -> Iterable[str]:
    signals = set()
-    for term in build_tca_mappo_reward_blueprint().terms:
+    for term in build_tac_mappo_reward_blueprint().terms:
        signals.update(term.required_signals)
    return tuple(sorted(signals))

--- a/scripts/evaluate_models.py
+++ b/scripts/evaluate_models.py
@ -35,7 +35,7 @@ from agents.dcqmix_agent import DCQMIXAgent
 from agents.rule_vsl_agent import RULE_VSL_AGENT_CLASSES
 from agents.sac_agent import SACAgent
 from agents.sctd3_agent import SCTD3Agent
-from agents.tcamappo_agent import TCAMAPPOAgent
+from agents.tacmappo_agent import TACMAPPOAgent
 from agents.td3_agent import TD3Agent
 from envs.edge_vsl_env import SUMOEdgeVSLEnvironment
 from envs.reward_system import REWARD_COMPONENT_COLUMNS, REWARD_COMPONENT_LABELS
@ -50,7 +50,7 @@ from utils.run_dirs import find_shared_config_path, resolve_checkpoint_root


 RULE_BASELINE_NAMES = ("occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl")
-MODEL_ORDER = [*RULE_BASELINE_NAMES, "ppo", "gpro", "appo", "mappo", "tcamappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"]
+MODEL_ORDER = [*RULE_BASELINE_NAMES, "ppo", "gpro", "appo", "mappo", "tacmappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"]
 BASELINE_NAME = "no_control"
 EVAL_ORDER = [BASELINE_NAME] + MODEL_ORDER
 MODEL_LABELS = {
@ -62,7 +62,7 @@ MODEL_LABELS = {
    "gpro": "GPRO-PPO",
    "appo": "APPO",
    "mappo": "MAPPO",
-    "tcamappo": "TCA-MAPPO",
+    "tacmappo": "TAC-MAPPO",
    "dcmappo": "DC-MAPPO",
    "dqn": "DQN",
    "madqn": "MA-DQN",
@ -101,7 +101,7 @@ def parse_args():
        "--models",
        nargs="*",
        default=None,
-        help="Subset of models to evaluate, e.g. --models occ_rule_vsl bottleneck_rule_vsl harmonization_rule_vsl ppo gpro tcamappo dcmappo dqn madqn ddqn qmix dcqmix ddpg d3pg sac td3 sctd3",
+        help="Subset of models to evaluate, e.g. --models occ_rule_vsl bottleneck_rule_vsl harmonization_rule_vsl ppo gpro tacmappo dcmappo dqn madqn ddqn qmix dcqmix ddpg d3pg sac td3 sctd3",
    )
    parser.add_argument("--seed", type=int, default=42, help="Evaluation seed.")
    parser.add_argument(
@ -265,7 +265,7 @@ def resolve_project_path(path_str: Optional[str]) -> Optional[str]:
 def resolve_model_load_path(model_name: str, checkpoint_dir: str) -> str:
    if model_name in RULE_BASELINE_NAMES:
        return ""
-    if model_name in {"ppo", "gpro", "appo", "mappo", "tcamappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix"}:
+    if model_name in {"ppo", "gpro", "appo", "mappo", "tacmappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix"}:
        best_path = os.path.join(checkpoint_dir, "model_best.pt")
        if os.path.isfile(best_path):
            return best_path
@ -304,7 +304,6 @@ def build_value_based_agent(agent_cls, agent_cfg: dict, env: SUMOEdgeVSLEnvironm
        "target_update": agent_cfg.get("target_update", 5),
        "device": agent_cfg.get("device", "cuda"),
        "edge_feature_dim": env.features_per_edge,
-        "time_feature_dim": 3,
        "total_edge_count": env.num_edges,
        "controlled_start_index": env.controlled_edge_start_index,
        "num_corridor_blocks": agent_cfg.get("num_corridor_blocks", 2),
@ -431,8 +430,8 @@ def build_agent(model_name: str, config: dict, env: SUMOEdgeVSLEnvironment):
            lr_schedule=agent_cfg.get("lr_schedule", "cosine"),
            total_episodes=config.get("training", {}).get("num_episodes", 4000),
        )
-    if model_name == "tcamappo":
-        return TCAMAPPOAgent(
+    if model_name == "tacmappo":
+        return TACMAPPOAgent(
            state_dim=env.state_dim,
            num_agents=env.num_controlled_edges,
            num_actions=env.action_dim,
--- a/scripts/plot_live_training.py
+++ b/scripts/plot_live_training.py
@ -20,7 +20,7 @@ from envs.reward_system import REWARD_COMPONENT_COLUMNS, REWARD_COMPONENT_LABELS
 from utils.run_dirs import find_latest_run_root, find_run_root_by_timestamp


-MODEL_ORDER = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "appo", "mappo", "tcamappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"]
+MODEL_ORDER = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "appo", "mappo", "tacmappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"]
 MODEL_LABELS = {
    "occ_rule_vsl": "Occ-Rule-VSL",
    "bottleneck_rule_vsl": "Bottleneck-Rule-VSL",
@ -29,7 +29,7 @@ MODEL_LABELS = {
    "gpro": "GPRO-PPO",
    "appo": "APPO",
    "mappo": "MAPPO",
-    "tcamappo": "TCA-MAPPO",
+    "tacmappo": "TAC-MAPPO",
    "dcmappo": "DC-MAPPO",
    "dqn": "DQN",
    "madqn": "MA-DQN",
@ -50,7 +50,7 @@ MODEL_COLORS = {
    "gpro": "#6a3d9a",
    "appo": "#ff7f0e",
    "mappo": "#2ca02c",
-    "tcamappo": "#7f7f7f",
+    "tacmappo": "#7f7f7f",
    "dcmappo": "#8c564b",
    "dqn": "#d62728",
    "madqn": "#ff9896",
@ -69,7 +69,7 @@ FLOW_LABEL = REWARD_COMPONENT_LABELS.get(FLOW_COLUMN, "Flow")

 def parse_args():
    parser = argparse.ArgumentParser(description="Plot live training progress from run logs.")
-    parser.add_argument("--model", default=None, help="Model name, e.g. ppo/gpro/appo/mappo/tcamappo/dcmappo/dqn/madqn/ddqn/qmix/dcqmix/ddpg/d3pg/sac/td3/sctd3")
+    parser.add_argument("--model", default=None, help="Model name, e.g. ppo/gpro/appo/mappo/tacmappo/dcmappo/dqn/madqn/ddqn/qmix/dcqmix/ddpg/d3pg/sac/td3/sctd3")
    parser.add_argument(
        "--all-models",
        action="store_true",
--- a/training/registry.py
+++ b/training/registry.py
@ -20,13 +20,13 @@ from training.train_rule_vsl import (
 )
 from training.train_sac import train_sumo_sac
 from training.train_sctd3 import train_sumo_sctd3
-from training.train_tcamappo import train_sumo_tcamappo
+from training.train_tacmappo import train_sumo_tacmappo
 from training.train_td3 import train_sumo_td3


 # DEFAULT_MODELS: List[str] = ["ppo"]
-DEFAULT_MODELS: List[str] = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "mappo", "tcamappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3"]
-ALL_MODELS: List[str] = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "appo", "mappo", "tcamappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"]
+DEFAULT_MODELS: List[str] = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "mappo", "tacmappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3"]
+ALL_MODELS: List[str] = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "appo", "mappo", "tacmappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"]


 TRAINERS: Dict[str, Callable] = {
@ -37,7 +37,7 @@ TRAINERS: Dict[str, Callable] = {
    "gpro": train_sumo_gpro,
    "appo": train_sumo_appo,
    "mappo": train_sumo_mappo,
-    "tcamappo": train_sumo_tcamappo,
+    "tacmappo": train_sumo_tacmappo,
    "dcmappo": train_sumo_dcmappo,
    "dqn": train_sumo_dqn,
    "madqn": train_sumo_madqn,
--- a/training/train_tacmappo.py
+++ b/training/train_tacmappo.py
@ -1,5 +1,5 @@
 """
-Temporal Credit Assignment MAPPO training script for SUMO + TraCI VSL.
+TAC-MAPPO training script for SUMO + TraCI VSL.
 """
 import copy
 import os
@ -11,7 +11,7 @@ from tqdm import tqdm

 matplotlib.use("Agg")

-from agents.tcamappo_agent import TCAMAPPOAgent
+from agents.tacmappo_agent import TACMAPPOAgent
 from envs.edge_vsl_env import SUMOEdgeVSLEnvironment
 from envs.reward_system import REWARD_COMPONENT_COLUMNS, average_reward_components, init_reward_component_totals
 from utils.config import get_agent_config, get_training_config
@ -22,17 +22,17 @@ from utils.run_dirs import resolve_run_dirs, write_shared_run_config
 from utils.seeding import derive_seed, resolve_base_seed, set_global_seed


-def train_sumo_tcamappo(log_dir=None, checkpoint_dir=None, run_timestamp=None):
+def train_sumo_tacmappo(log_dir=None, checkpoint_dir=None, run_timestamp=None):
    with open("config_sumo_vsl.yaml", "r", encoding="utf-8") as f:
        config = yaml.safe_load(f)

-    agent_config = get_agent_config(config, "tcamappo")
+    agent_config = get_agent_config(config, "tacmappo")
    train_config = get_training_config(config)
    base_seed = resolve_base_seed(train_config)
    set_global_seed(base_seed)

    _, checkpoint_dir, log_dir = resolve_run_dirs(
-        "tcamappo",
+        "tacmappo",
        log_dir=log_dir,
        checkpoint_dir=checkpoint_dir,
        run_timestamp=run_timestamp,
@ -49,11 +49,11 @@ def train_sumo_tcamappo(log_dir=None, checkpoint_dir=None, run_timestamp=None):
        run_timestamp=run_timestamp,
    )

-    logger = TrainingLogger(log_dir, "tcamappo")
+    logger = TrainingLogger(log_dir, "tacmappo")
    env = SUMOEdgeVSLEnvironment(runtime_config)

    print("=" * 70)
-    print("TCA-MAPPO training - SUMO+TraCI VSL environment")
+    print("TAC-MAPPO training - SUMO+TraCI VSL environment")
    print("=" * 70)
    print(f"  State dim: {env.state_dim}")
    print(f"  Agents: {env.num_edges}")
@ -68,7 +68,7 @@ def train_sumo_tcamappo(log_dir=None, checkpoint_dir=None, run_timestamp=None):
    print(f"  Global seed: {base_seed if base_seed is not None else 'None (random)'}")
    print()

-    agent = TCAMAPPOAgent(
+    agent = TACMAPPOAgent(
        state_dim=env.state_dim,
        num_agents=env.num_controlled_edges,
        num_actions=env.action_dim,
--- a/training/train_value_based.py
+++ b/training/train_value_based.py
@ -43,7 +43,6 @@ def _build_value_based_agent(agent_builder: Callable[..., object], env, agent_co
        "target_update": agent_config.get("target_update", 10),
        "device": agent_config.get("device", "cuda"),
        "edge_feature_dim": env.features_per_edge,
-        "time_feature_dim": 3,
        "total_edge_count": env.num_edges,
        "controlled_start_index": env.controlled_edge_start_index,
        "num_corridor_blocks": agent_config.get("num_corridor_blocks", 2),