在训练中删去训练时长相关信息和时序信息

This commit is contained in:
Zihan Ye 2026-05-11 17:31:06 +08:00
parent ad65d7caf6
commit cb7c7964ae
15 changed files with 47 additions and 97 deletions

View File

@ -44,7 +44,6 @@ class MultiDiscreteActorCritic(nn.Module):
state_dim: int,
action_dims: List[int],
edge_feature_dim: int = 3,
time_feature_dim: int = 3,
total_edge_count: int | None = None,
controlled_start_index: int = 0,
hidden_dim: int = 128,
@ -58,21 +57,15 @@ class MultiDiscreteActorCritic(nn.Module):
self.num_zones = len(action_dims)
self.edge_feature_dim = edge_feature_dim
self.speed_feature_dim = 1
self.time_feature_dim = time_feature_dim
self.total_edge_count = int(total_edge_count if total_edge_count is not None else self.num_zones)
self.controlled_start_index = int(controlled_start_index)
self.controlled_end_index = self.controlled_start_index + self.num_zones
if self.controlled_end_index > self.total_edge_count:
raise ValueError("controlled action slice exceeds total edge count")
self.last_reward_dim = 1
self.global_feature_dim = self.time_feature_dim + self.last_reward_dim
self.global_feature_dim = self.last_reward_dim
self.agent_id_dim = 1
self.local_obs_dim = (
self.edge_feature_dim
+ self.speed_feature_dim
+ self.global_feature_dim
+ self.agent_id_dim
)
self.local_obs_dim = self.edge_feature_dim + self.speed_feature_dim + self.global_feature_dim + self.agent_id_dim
self.local_encoder = nn.Sequential(
nn.Linear(self.local_obs_dim, hidden_dim),
@ -155,7 +148,6 @@ class APPOAgent:
state_dim: int,
action_dims: List[int],
edge_feature_dim: int = 3,
time_feature_dim: int = 3,
total_edge_count: int | None = None,
controlled_start_index: int = 0,
hidden_dim: int = 128,
@ -189,7 +181,6 @@ class APPOAgent:
state_dim=state_dim,
action_dims=action_dims,
edge_feature_dim=edge_feature_dim,
time_feature_dim=time_feature_dim,
total_edge_count=total_edge_count,
controlled_start_index=controlled_start_index,
hidden_dim=hidden_dim,

View File

@ -179,7 +179,6 @@ class StructuredCorridorCritic(nn.Module):
self,
num_agents: int,
edge_feature_dim: int,
time_feature_dim: int,
hidden_dim: int = 256,
num_blocks: int = 2,
kernel_size: int = 5,
@ -188,10 +187,9 @@ class StructuredCorridorCritic(nn.Module):
super().__init__()
self.num_agents = num_agents
self.edge_feature_dim = edge_feature_dim
self.time_feature_dim = time_feature_dim
self.speed_feature_dim = 1
self.last_reward_dim = 1
self.global_feature_dim = self.time_feature_dim + self.last_reward_dim
self.global_feature_dim = self.last_reward_dim
self.edge_token_dim = self.edge_feature_dim + self.speed_feature_dim
self.edge_proj = nn.Sequential(
@ -251,7 +249,6 @@ class DCMAPPOAgent:
num_agents: int,
num_actions: int,
edge_feature_dim: int = 3,
time_feature_dim: int = 3,
total_edge_count: int | None = None,
controlled_start_index: int = 0,
hidden_dim: int = 256,
@ -277,7 +274,6 @@ class DCMAPPOAgent:
self.num_agents = num_agents
self.num_actions = num_actions
self.edge_feature_dim = edge_feature_dim
self.time_feature_dim = time_feature_dim
self.total_edge_count = int(total_edge_count if total_edge_count is not None else num_agents)
self.controlled_start_index = int(controlled_start_index)
self.controlled_end_index = self.controlled_start_index + self.num_agents
@ -294,7 +290,7 @@ class DCMAPPOAgent:
self.speed_feature_dim = 1
self.last_reward_dim = 1
self.global_feature_dim = self.time_feature_dim + self.last_reward_dim
self.global_feature_dim = self.last_reward_dim
self.agent_id_dim = 1
self.edge_token_dim = self.edge_feature_dim + self.speed_feature_dim
self.condition_dim = self.global_feature_dim + self.agent_id_dim
@ -312,7 +308,6 @@ class DCMAPPOAgent:
self.critic = StructuredCorridorCritic(
num_agents=num_agents,
edge_feature_dim=edge_feature_dim,
time_feature_dim=time_feature_dim,
hidden_dim=critic_hidden_dim,
num_blocks=max(1, num_corridor_blocks),
kernel_size=corridor_kernel_size,

View File

@ -120,7 +120,6 @@ class DirectionalQMixer(nn.Module):
self,
num_agents: int,
edge_feature_dim: int,
time_feature_dim: int,
total_edge_count: int,
controlled_start_index: int,
mixing_hidden_dim: int = 256,
@ -132,13 +131,12 @@ class DirectionalQMixer(nn.Module):
super().__init__()
self.num_agents = num_agents
self.edge_feature_dim = edge_feature_dim
self.time_feature_dim = time_feature_dim
self.speed_feature_dim = 1
self.last_reward_dim = 1
self.total_edge_count = total_edge_count
self.controlled_start_index = controlled_start_index
self.controlled_end_index = self.controlled_start_index + self.num_agents
self.global_feature_dim = self.time_feature_dim + self.last_reward_dim
self.global_feature_dim = self.last_reward_dim
self.edge_token_dim = self.edge_feature_dim + self.speed_feature_dim
self.state_encoder = DirectionalMixerStateEncoder(
num_agents=self.num_agents,
@ -228,7 +226,6 @@ class DCQMIXAgent(QMIXAgent):
target_update: int = 10,
device: str = "cuda",
edge_feature_dim: int = 3,
time_feature_dim: int = 3,
total_edge_count: int | None = None,
controlled_start_index: int = 0,
num_corridor_blocks: int = 2,
@ -254,7 +251,6 @@ class DCQMIXAgent(QMIXAgent):
target_update=target_update,
device=device,
edge_feature_dim=edge_feature_dim,
time_feature_dim=time_feature_dim,
total_edge_count=total_edge_count,
controlled_start_index=controlled_start_index,
)
@ -262,7 +258,7 @@ class DCQMIXAgent(QMIXAgent):
def _build_utility_network(self, hidden_dim: int) -> nn.Module:
return DirectionalUtilityNetwork(
edge_token_dim=self.edge_feature_dim + self.speed_feature_dim,
condition_dim=self.time_feature_dim + self.last_reward_dim + self.agent_id_dim,
condition_dim=self.last_reward_dim + self.agent_id_dim,
num_agents=self.num_agents,
num_actions=self.num_actions_per_agent,
hidden_dim=hidden_dim,
@ -275,7 +271,6 @@ class DCQMIXAgent(QMIXAgent):
return DirectionalQMixer(
num_agents=self.num_agents,
edge_feature_dim=self.edge_feature_dim,
time_feature_dim=self.time_feature_dim,
total_edge_count=self.total_edge_count,
controlled_start_index=self.controlled_start_index,
mixing_hidden_dim=mixing_hidden_dim,
@ -302,7 +297,7 @@ class DCQMIXAgent(QMIXAgent):
speed_block_start = edge_block
speed_block_end = speed_block_start + self.total_edge_count
global_block_start = speed_block_end
global_block_end = global_block_start + self.time_feature_dim + self.last_reward_dim
global_block_end = global_block_start + self.last_reward_dim
edge_features = state_tensor[:, :edge_block].view(batch_size, self.total_edge_count, self.edge_feature_dim)
edge_features = edge_features[:, self.controlled_start_index:self.controlled_end_index, :]

View File

@ -71,7 +71,6 @@ class MAPPOAgent:
num_agents: int,
num_actions: int,
edge_feature_dim: int = 3,
time_feature_dim: int = 3,
total_edge_count: int | None = None,
controlled_start_index: int = 0,
hidden_dim: int = 256,
@ -94,7 +93,6 @@ class MAPPOAgent:
self.num_agents = num_agents
self.num_actions = num_actions
self.edge_feature_dim = edge_feature_dim
self.time_feature_dim = time_feature_dim
self.total_edge_count = int(total_edge_count if total_edge_count is not None else num_agents)
self.controlled_start_index = int(controlled_start_index)
self.controlled_end_index = self.controlled_start_index + self.num_agents
@ -112,13 +110,7 @@ class MAPPOAgent:
self.speed_feature_dim = 1
self.last_reward_dim = 1
self.agent_id_dim = 1
self.local_obs_dim = (
edge_feature_dim
+ self.speed_feature_dim
+ time_feature_dim
+ self.last_reward_dim
+ self.agent_id_dim
)
self.local_obs_dim = edge_feature_dim + self.speed_feature_dim + self.last_reward_dim + self.agent_id_dim
self.actor = SharedActor(self.local_obs_dim, num_actions, hidden_dim).to(self.device)
self.critic = CentralizedCritic(state_dim, critic_hidden_dim).to(self.device)
@ -156,7 +148,7 @@ class MAPPOAgent:
speed_block_start = edge_block
speed_block_end = speed_block_start + self.total_edge_count
global_block_start = speed_block_end
global_block_end = global_block_start + self.time_feature_dim + self.last_reward_dim
global_block_end = global_block_start + self.last_reward_dim
edge_features = state_tensor[:, :edge_block].view(batch_size, self.total_edge_count, self.edge_feature_dim)
edge_features = edge_features[:, self.controlled_start_index:self.controlled_end_index, :]

View File

@ -119,7 +119,6 @@ class QMIXAgent:
target_update: int = 10,
device: str = "cuda",
edge_feature_dim: int = 3,
time_feature_dim: int = 3,
total_edge_count: int | None = None,
controlled_start_index: int = 0,
):
@ -134,7 +133,6 @@ class QMIXAgent:
self.batch_size = int(batch_size)
self.target_update = max(int(target_update), 1)
self.edge_feature_dim = int(edge_feature_dim)
self.time_feature_dim = int(time_feature_dim)
self.speed_feature_dim = 1
self.last_reward_dim = 1
self.agent_id_dim = 1
@ -144,13 +142,7 @@ class QMIXAgent:
if self.controlled_end_index > self.total_edge_count:
raise ValueError("controlled action slice exceeds total edge count")
self.local_obs_dim = (
self.edge_feature_dim
+ self.speed_feature_dim
+ self.time_feature_dim
+ self.last_reward_dim
+ self.agent_id_dim
)
self.local_obs_dim = self.edge_feature_dim + self.speed_feature_dim + self.last_reward_dim + self.agent_id_dim
self.utility_net = self._build_utility_network(hidden_dim).to(self.device)
self.target_utility_net = self._build_utility_network(hidden_dim).to(self.device)
@ -201,7 +193,7 @@ class QMIXAgent:
speed_block_start = edge_block
speed_block_end = speed_block_start + self.total_edge_count
global_block_start = speed_block_end
global_block_end = global_block_start + self.time_feature_dim + self.last_reward_dim
global_block_end = global_block_start + self.last_reward_dim
edge_features = state_tensor[:, :edge_block].view(batch_size, self.total_edge_count, self.edge_feature_dim)
edge_features = edge_features[:, self.controlled_start_index:self.controlled_end_index, :]

View File

@ -80,7 +80,7 @@ class EdgeStructuredExtractor(BaseFeaturesExtractor):
State layout is assumed to be:
- per-edge traffic features: [speed_norm, occ_norm, flow_norm] * num_edges
- per-edge current limit feature: [limit_norm] * num_edges
- global features: [time_progress, sin_t, cos_t, last_reward]
- global features: [last_reward]
"""
def __init__(
@ -88,7 +88,7 @@ class EdgeStructuredExtractor(BaseFeaturesExtractor):
observation_space: spaces.Box,
num_edges: int,
edge_feature_dim: int = 3,
global_feature_dim: int = 4,
global_feature_dim: int = 1,
total_edge_count: int | None = None,
controlled_start_index: int = 0,
edge_hidden_dim: int = 16,
@ -279,7 +279,7 @@ class SCTD3Agent:
"features_extractor_kwargs": {
"num_edges": self.num_zones,
"edge_feature_dim": edge_feature_dim,
"global_feature_dim": 4,
"global_feature_dim": 1,
"total_edge_count": total_edge_count if total_edge_count is not None else self.num_zones,
"controlled_start_index": controlled_start_index,
"edge_hidden_dim": extractor_edge_hidden_dim,

View File

@ -1,5 +1,5 @@
"""
Temporal Credit Assignment MAPPO for SUMO VSL.
TAC-MAPPO for SUMO VSL.
- Actor: same decentralized shared actor style as MAPPO
- Critic: current-state query attends over recent decision/outcome history
@ -122,7 +122,7 @@ class TemporalCreditCritic(nn.Module):
return self.head(fused)
class TCAMAPPOAgent:
class TACMAPPOAgent:
"""MAPPO with a temporal credit-assignment critic."""
def __init__(
@ -131,7 +131,6 @@ class TCAMAPPOAgent:
num_agents: int,
num_actions: int,
edge_feature_dim: int = 3,
time_feature_dim: int = 3,
total_edge_count: int | None = None,
controlled_start_index: int = 0,
hidden_dim: int = 256,
@ -158,7 +157,6 @@ class TCAMAPPOAgent:
self.num_agents = num_agents
self.num_actions = num_actions
self.edge_feature_dim = edge_feature_dim
self.time_feature_dim = time_feature_dim
self.total_edge_count = int(total_edge_count if total_edge_count is not None else num_agents)
self.controlled_start_index = int(controlled_start_index)
self.controlled_end_index = self.controlled_start_index + self.num_agents
@ -177,13 +175,7 @@ class TCAMAPPOAgent:
self.speed_feature_dim = 1
self.last_reward_dim = 1
self.agent_id_dim = 1
self.local_obs_dim = (
edge_feature_dim
+ self.speed_feature_dim
+ time_feature_dim
+ self.last_reward_dim
+ self.agent_id_dim
)
self.local_obs_dim = edge_feature_dim + self.speed_feature_dim + self.last_reward_dim + self.agent_id_dim
self.reward_feature_dim = 1 + len(REWARD_COMPONENT_COLUMNS)
self.history_token_dim = state_dim + num_agents + self.reward_feature_dim
@ -239,7 +231,7 @@ class TCAMAPPOAgent:
speed_block_start = edge_block
speed_block_end = speed_block_start + self.total_edge_count
global_block_start = speed_block_end
global_block_end = global_block_start + self.time_feature_dim + self.last_reward_dim
global_block_end = global_block_start + self.last_reward_dim
edge_features = state_tensor[:, :edge_block].view(batch_size, self.total_edge_count, self.edge_feature_dim)
edge_features = edge_features[:, self.controlled_start_index:self.controlled_end_index, :]

View File

@ -243,7 +243,7 @@ agents:
batch_size: 15
lr_schedule: "cosine"
tcamappo:
tacmappo:
hidden_dim: 256
critic_hidden_dim: 256
history_window: 6

View File

@ -175,7 +175,7 @@ class SUMOEdgeVSLEnvironment:
self.action_dims = [self.num_speed_actions] * self.num_controlled_edges
self.features_per_edge = 3
self._state_dim = (self.features_per_edge + 1) * self.num_edges + 3 + 1
self._state_dim = (self.features_per_edge + 1) * self.num_edges + 1
self.current_step = 0
self._sumo_running = False
@ -810,7 +810,6 @@ class SUMOEdgeVSLEnvironment:
done = self.current_step >= self.episode_length
info["reward"] = reward
info["step"] = self.current_step
info["edge_speeds_kmh"] = (edge_speeds * 3.6).tolist()
info["action_applied_mask"] = [
bool(apply_control and idx not in self.passive_segment_indices)
@ -1024,10 +1023,6 @@ class SUMOEdgeVSLEnvironment:
for idx in range(self.num_edges):
state_parts.append(self.current_edge_speeds[idx] / self.free_flow_speed)
time_progress = self.current_step / max(self.episode_length, 1)
state_parts.append(time_progress)
state_parts.append(np.sin(2 * np.pi * time_progress))
state_parts.append(np.cos(2 * np.pi * time_progress))
state_parts.append(self._last_reward)
return np.array(state_parts, dtype=np.float32)

View File

@ -73,7 +73,7 @@ class RewardBlueprint:
return "\n".join(lines).rstrip() + "\n"
def build_tca_mappo_reward_blueprint() -> RewardBlueprint:
def build_tac_mappo_reward_blueprint() -> RewardBlueprint:
"""Build the April-style four-term reward blueprint for corridor VSL."""
terms = (
@ -127,7 +127,7 @@ def build_tca_mappo_reward_blueprint() -> RewardBlueprint:
)
return RewardBlueprint(
name="Four-Term Reward Blueprint For TCA-MAPPO",
name="Four-Term Reward Blueprint For TAC-MAPPO",
scenario_summary=(
"The study controls a segmented freeway VSL corridor under fixed control intervals. "
"The reward should stay simple, bounded, and decomposable so that each term has a clear "
@ -162,12 +162,12 @@ def build_tca_mappo_reward_blueprint() -> RewardBlueprint:
def build_reward_blueprint_markdown() -> str:
return build_tca_mappo_reward_blueprint().to_markdown()
return build_tac_mappo_reward_blueprint().to_markdown()
def iter_required_signals() -> Iterable[str]:
signals = set()
for term in build_tca_mappo_reward_blueprint().terms:
for term in build_tac_mappo_reward_blueprint().terms:
signals.update(term.required_signals)
return tuple(sorted(signals))

View File

@ -35,7 +35,7 @@ from agents.dcqmix_agent import DCQMIXAgent
from agents.rule_vsl_agent import RULE_VSL_AGENT_CLASSES
from agents.sac_agent import SACAgent
from agents.sctd3_agent import SCTD3Agent
from agents.tcamappo_agent import TCAMAPPOAgent
from agents.tacmappo_agent import TACMAPPOAgent
from agents.td3_agent import TD3Agent
from envs.edge_vsl_env import SUMOEdgeVSLEnvironment
from envs.reward_system import REWARD_COMPONENT_COLUMNS, REWARD_COMPONENT_LABELS
@ -50,7 +50,7 @@ from utils.run_dirs import find_shared_config_path, resolve_checkpoint_root
RULE_BASELINE_NAMES = ("occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl")
MODEL_ORDER = [*RULE_BASELINE_NAMES, "ppo", "gpro", "appo", "mappo", "tcamappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"]
MODEL_ORDER = [*RULE_BASELINE_NAMES, "ppo", "gpro", "appo", "mappo", "tacmappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"]
BASELINE_NAME = "no_control"
EVAL_ORDER = [BASELINE_NAME] + MODEL_ORDER
MODEL_LABELS = {
@ -62,7 +62,7 @@ MODEL_LABELS = {
"gpro": "GPRO-PPO",
"appo": "APPO",
"mappo": "MAPPO",
"tcamappo": "TCA-MAPPO",
"tacmappo": "TAC-MAPPO",
"dcmappo": "DC-MAPPO",
"dqn": "DQN",
"madqn": "MA-DQN",
@ -101,7 +101,7 @@ def parse_args():
"--models",
nargs="*",
default=None,
help="Subset of models to evaluate, e.g. --models occ_rule_vsl bottleneck_rule_vsl harmonization_rule_vsl ppo gpro tcamappo dcmappo dqn madqn ddqn qmix dcqmix ddpg d3pg sac td3 sctd3",
help="Subset of models to evaluate, e.g. --models occ_rule_vsl bottleneck_rule_vsl harmonization_rule_vsl ppo gpro tacmappo dcmappo dqn madqn ddqn qmix dcqmix ddpg d3pg sac td3 sctd3",
)
parser.add_argument("--seed", type=int, default=42, help="Evaluation seed.")
parser.add_argument(
@ -265,7 +265,7 @@ def resolve_project_path(path_str: Optional[str]) -> Optional[str]:
def resolve_model_load_path(model_name: str, checkpoint_dir: str) -> str:
if model_name in RULE_BASELINE_NAMES:
return ""
if model_name in {"ppo", "gpro", "appo", "mappo", "tcamappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix"}:
if model_name in {"ppo", "gpro", "appo", "mappo", "tacmappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix"}:
best_path = os.path.join(checkpoint_dir, "model_best.pt")
if os.path.isfile(best_path):
return best_path
@ -304,7 +304,6 @@ def build_value_based_agent(agent_cls, agent_cfg: dict, env: SUMOEdgeVSLEnvironm
"target_update": agent_cfg.get("target_update", 5),
"device": agent_cfg.get("device", "cuda"),
"edge_feature_dim": env.features_per_edge,
"time_feature_dim": 3,
"total_edge_count": env.num_edges,
"controlled_start_index": env.controlled_edge_start_index,
"num_corridor_blocks": agent_cfg.get("num_corridor_blocks", 2),
@ -431,8 +430,8 @@ def build_agent(model_name: str, config: dict, env: SUMOEdgeVSLEnvironment):
lr_schedule=agent_cfg.get("lr_schedule", "cosine"),
total_episodes=config.get("training", {}).get("num_episodes", 4000),
)
if model_name == "tcamappo":
return TCAMAPPOAgent(
if model_name == "tacmappo":
return TACMAPPOAgent(
state_dim=env.state_dim,
num_agents=env.num_controlled_edges,
num_actions=env.action_dim,

View File

@ -20,7 +20,7 @@ from envs.reward_system import REWARD_COMPONENT_COLUMNS, REWARD_COMPONENT_LABELS
from utils.run_dirs import find_latest_run_root, find_run_root_by_timestamp
MODEL_ORDER = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "appo", "mappo", "tcamappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"]
MODEL_ORDER = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "appo", "mappo", "tacmappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"]
MODEL_LABELS = {
"occ_rule_vsl": "Occ-Rule-VSL",
"bottleneck_rule_vsl": "Bottleneck-Rule-VSL",
@ -29,7 +29,7 @@ MODEL_LABELS = {
"gpro": "GPRO-PPO",
"appo": "APPO",
"mappo": "MAPPO",
"tcamappo": "TCA-MAPPO",
"tacmappo": "TAC-MAPPO",
"dcmappo": "DC-MAPPO",
"dqn": "DQN",
"madqn": "MA-DQN",
@ -50,7 +50,7 @@ MODEL_COLORS = {
"gpro": "#6a3d9a",
"appo": "#ff7f0e",
"mappo": "#2ca02c",
"tcamappo": "#7f7f7f",
"tacmappo": "#7f7f7f",
"dcmappo": "#8c564b",
"dqn": "#d62728",
"madqn": "#ff9896",
@ -69,7 +69,7 @@ FLOW_LABEL = REWARD_COMPONENT_LABELS.get(FLOW_COLUMN, "Flow")
def parse_args():
parser = argparse.ArgumentParser(description="Plot live training progress from run logs.")
parser.add_argument("--model", default=None, help="Model name, e.g. ppo/gpro/appo/mappo/tcamappo/dcmappo/dqn/madqn/ddqn/qmix/dcqmix/ddpg/d3pg/sac/td3/sctd3")
parser.add_argument("--model", default=None, help="Model name, e.g. ppo/gpro/appo/mappo/tacmappo/dcmappo/dqn/madqn/ddqn/qmix/dcqmix/ddpg/d3pg/sac/td3/sctd3")
parser.add_argument(
"--all-models",
action="store_true",

View File

@ -20,13 +20,13 @@ from training.train_rule_vsl import (
)
from training.train_sac import train_sumo_sac
from training.train_sctd3 import train_sumo_sctd3
from training.train_tcamappo import train_sumo_tcamappo
from training.train_tacmappo import train_sumo_tacmappo
from training.train_td3 import train_sumo_td3
# DEFAULT_MODELS: List[str] = ["ppo"]
DEFAULT_MODELS: List[str] = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "mappo", "tcamappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3"]
ALL_MODELS: List[str] = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "appo", "mappo", "tcamappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"]
DEFAULT_MODELS: List[str] = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "mappo", "tacmappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3"]
ALL_MODELS: List[str] = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "appo", "mappo", "tacmappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"]
TRAINERS: Dict[str, Callable] = {
@ -37,7 +37,7 @@ TRAINERS: Dict[str, Callable] = {
"gpro": train_sumo_gpro,
"appo": train_sumo_appo,
"mappo": train_sumo_mappo,
"tcamappo": train_sumo_tcamappo,
"tacmappo": train_sumo_tacmappo,
"dcmappo": train_sumo_dcmappo,
"dqn": train_sumo_dqn,
"madqn": train_sumo_madqn,

View File

@ -1,5 +1,5 @@
"""
Temporal Credit Assignment MAPPO training script for SUMO + TraCI VSL.
TAC-MAPPO training script for SUMO + TraCI VSL.
"""
import copy
import os
@ -11,7 +11,7 @@ from tqdm import tqdm
matplotlib.use("Agg")
from agents.tcamappo_agent import TCAMAPPOAgent
from agents.tacmappo_agent import TACMAPPOAgent
from envs.edge_vsl_env import SUMOEdgeVSLEnvironment
from envs.reward_system import REWARD_COMPONENT_COLUMNS, average_reward_components, init_reward_component_totals
from utils.config import get_agent_config, get_training_config
@ -22,17 +22,17 @@ from utils.run_dirs import resolve_run_dirs, write_shared_run_config
from utils.seeding import derive_seed, resolve_base_seed, set_global_seed
def train_sumo_tcamappo(log_dir=None, checkpoint_dir=None, run_timestamp=None):
def train_sumo_tacmappo(log_dir=None, checkpoint_dir=None, run_timestamp=None):
with open("config_sumo_vsl.yaml", "r", encoding="utf-8") as f:
config = yaml.safe_load(f)
agent_config = get_agent_config(config, "tcamappo")
agent_config = get_agent_config(config, "tacmappo")
train_config = get_training_config(config)
base_seed = resolve_base_seed(train_config)
set_global_seed(base_seed)
_, checkpoint_dir, log_dir = resolve_run_dirs(
"tcamappo",
"tacmappo",
log_dir=log_dir,
checkpoint_dir=checkpoint_dir,
run_timestamp=run_timestamp,
@ -49,11 +49,11 @@ def train_sumo_tcamappo(log_dir=None, checkpoint_dir=None, run_timestamp=None):
run_timestamp=run_timestamp,
)
logger = TrainingLogger(log_dir, "tcamappo")
logger = TrainingLogger(log_dir, "tacmappo")
env = SUMOEdgeVSLEnvironment(runtime_config)
print("=" * 70)
print("TCA-MAPPO training - SUMO+TraCI VSL environment")
print("TAC-MAPPO training - SUMO+TraCI VSL environment")
print("=" * 70)
print(f" State dim: {env.state_dim}")
print(f" Agents: {env.num_edges}")
@ -68,7 +68,7 @@ def train_sumo_tcamappo(log_dir=None, checkpoint_dir=None, run_timestamp=None):
print(f" Global seed: {base_seed if base_seed is not None else 'None (random)'}")
print()
agent = TCAMAPPOAgent(
agent = TACMAPPOAgent(
state_dim=env.state_dim,
num_agents=env.num_controlled_edges,
num_actions=env.action_dim,

View File

@ -43,7 +43,6 @@ def _build_value_based_agent(agent_builder: Callable[..., object], env, agent_co
"target_update": agent_config.get("target_update", 10),
"device": agent_config.get("device", "cuda"),
"edge_feature_dim": env.features_per_edge,
"time_feature_dim": 3,
"total_edge_count": env.num_edges,
"controlled_start_index": env.controlled_edge_start_index,
"num_corridor_blocks": agent_config.get("num_corridor_blocks", 2),