在训练中删去训练时长相关信息和时序信息
This commit is contained in:
parent
ad65d7caf6
commit
cb7c7964ae
|
|
@ -44,7 +44,6 @@ class MultiDiscreteActorCritic(nn.Module):
|
|||
state_dim: int,
|
||||
action_dims: List[int],
|
||||
edge_feature_dim: int = 3,
|
||||
time_feature_dim: int = 3,
|
||||
total_edge_count: int | None = None,
|
||||
controlled_start_index: int = 0,
|
||||
hidden_dim: int = 128,
|
||||
|
|
@ -58,21 +57,15 @@ class MultiDiscreteActorCritic(nn.Module):
|
|||
self.num_zones = len(action_dims)
|
||||
self.edge_feature_dim = edge_feature_dim
|
||||
self.speed_feature_dim = 1
|
||||
self.time_feature_dim = time_feature_dim
|
||||
self.total_edge_count = int(total_edge_count if total_edge_count is not None else self.num_zones)
|
||||
self.controlled_start_index = int(controlled_start_index)
|
||||
self.controlled_end_index = self.controlled_start_index + self.num_zones
|
||||
if self.controlled_end_index > self.total_edge_count:
|
||||
raise ValueError("controlled action slice exceeds total edge count")
|
||||
self.last_reward_dim = 1
|
||||
self.global_feature_dim = self.time_feature_dim + self.last_reward_dim
|
||||
self.global_feature_dim = self.last_reward_dim
|
||||
self.agent_id_dim = 1
|
||||
self.local_obs_dim = (
|
||||
self.edge_feature_dim
|
||||
+ self.speed_feature_dim
|
||||
+ self.global_feature_dim
|
||||
+ self.agent_id_dim
|
||||
)
|
||||
self.local_obs_dim = self.edge_feature_dim + self.speed_feature_dim + self.global_feature_dim + self.agent_id_dim
|
||||
|
||||
self.local_encoder = nn.Sequential(
|
||||
nn.Linear(self.local_obs_dim, hidden_dim),
|
||||
|
|
@ -155,7 +148,6 @@ class APPOAgent:
|
|||
state_dim: int,
|
||||
action_dims: List[int],
|
||||
edge_feature_dim: int = 3,
|
||||
time_feature_dim: int = 3,
|
||||
total_edge_count: int | None = None,
|
||||
controlled_start_index: int = 0,
|
||||
hidden_dim: int = 128,
|
||||
|
|
@ -189,7 +181,6 @@ class APPOAgent:
|
|||
state_dim=state_dim,
|
||||
action_dims=action_dims,
|
||||
edge_feature_dim=edge_feature_dim,
|
||||
time_feature_dim=time_feature_dim,
|
||||
total_edge_count=total_edge_count,
|
||||
controlled_start_index=controlled_start_index,
|
||||
hidden_dim=hidden_dim,
|
||||
|
|
|
|||
|
|
@ -179,7 +179,6 @@ class StructuredCorridorCritic(nn.Module):
|
|||
self,
|
||||
num_agents: int,
|
||||
edge_feature_dim: int,
|
||||
time_feature_dim: int,
|
||||
hidden_dim: int = 256,
|
||||
num_blocks: int = 2,
|
||||
kernel_size: int = 5,
|
||||
|
|
@ -188,10 +187,9 @@ class StructuredCorridorCritic(nn.Module):
|
|||
super().__init__()
|
||||
self.num_agents = num_agents
|
||||
self.edge_feature_dim = edge_feature_dim
|
||||
self.time_feature_dim = time_feature_dim
|
||||
self.speed_feature_dim = 1
|
||||
self.last_reward_dim = 1
|
||||
self.global_feature_dim = self.time_feature_dim + self.last_reward_dim
|
||||
self.global_feature_dim = self.last_reward_dim
|
||||
self.edge_token_dim = self.edge_feature_dim + self.speed_feature_dim
|
||||
|
||||
self.edge_proj = nn.Sequential(
|
||||
|
|
@ -251,7 +249,6 @@ class DCMAPPOAgent:
|
|||
num_agents: int,
|
||||
num_actions: int,
|
||||
edge_feature_dim: int = 3,
|
||||
time_feature_dim: int = 3,
|
||||
total_edge_count: int | None = None,
|
||||
controlled_start_index: int = 0,
|
||||
hidden_dim: int = 256,
|
||||
|
|
@ -277,7 +274,6 @@ class DCMAPPOAgent:
|
|||
self.num_agents = num_agents
|
||||
self.num_actions = num_actions
|
||||
self.edge_feature_dim = edge_feature_dim
|
||||
self.time_feature_dim = time_feature_dim
|
||||
self.total_edge_count = int(total_edge_count if total_edge_count is not None else num_agents)
|
||||
self.controlled_start_index = int(controlled_start_index)
|
||||
self.controlled_end_index = self.controlled_start_index + self.num_agents
|
||||
|
|
@ -294,7 +290,7 @@ class DCMAPPOAgent:
|
|||
|
||||
self.speed_feature_dim = 1
|
||||
self.last_reward_dim = 1
|
||||
self.global_feature_dim = self.time_feature_dim + self.last_reward_dim
|
||||
self.global_feature_dim = self.last_reward_dim
|
||||
self.agent_id_dim = 1
|
||||
self.edge_token_dim = self.edge_feature_dim + self.speed_feature_dim
|
||||
self.condition_dim = self.global_feature_dim + self.agent_id_dim
|
||||
|
|
@ -312,7 +308,6 @@ class DCMAPPOAgent:
|
|||
self.critic = StructuredCorridorCritic(
|
||||
num_agents=num_agents,
|
||||
edge_feature_dim=edge_feature_dim,
|
||||
time_feature_dim=time_feature_dim,
|
||||
hidden_dim=critic_hidden_dim,
|
||||
num_blocks=max(1, num_corridor_blocks),
|
||||
kernel_size=corridor_kernel_size,
|
||||
|
|
|
|||
|
|
@ -120,7 +120,6 @@ class DirectionalQMixer(nn.Module):
|
|||
self,
|
||||
num_agents: int,
|
||||
edge_feature_dim: int,
|
||||
time_feature_dim: int,
|
||||
total_edge_count: int,
|
||||
controlled_start_index: int,
|
||||
mixing_hidden_dim: int = 256,
|
||||
|
|
@ -132,13 +131,12 @@ class DirectionalQMixer(nn.Module):
|
|||
super().__init__()
|
||||
self.num_agents = num_agents
|
||||
self.edge_feature_dim = edge_feature_dim
|
||||
self.time_feature_dim = time_feature_dim
|
||||
self.speed_feature_dim = 1
|
||||
self.last_reward_dim = 1
|
||||
self.total_edge_count = total_edge_count
|
||||
self.controlled_start_index = controlled_start_index
|
||||
self.controlled_end_index = self.controlled_start_index + self.num_agents
|
||||
self.global_feature_dim = self.time_feature_dim + self.last_reward_dim
|
||||
self.global_feature_dim = self.last_reward_dim
|
||||
self.edge_token_dim = self.edge_feature_dim + self.speed_feature_dim
|
||||
self.state_encoder = DirectionalMixerStateEncoder(
|
||||
num_agents=self.num_agents,
|
||||
|
|
@ -228,7 +226,6 @@ class DCQMIXAgent(QMIXAgent):
|
|||
target_update: int = 10,
|
||||
device: str = "cuda",
|
||||
edge_feature_dim: int = 3,
|
||||
time_feature_dim: int = 3,
|
||||
total_edge_count: int | None = None,
|
||||
controlled_start_index: int = 0,
|
||||
num_corridor_blocks: int = 2,
|
||||
|
|
@ -254,7 +251,6 @@ class DCQMIXAgent(QMIXAgent):
|
|||
target_update=target_update,
|
||||
device=device,
|
||||
edge_feature_dim=edge_feature_dim,
|
||||
time_feature_dim=time_feature_dim,
|
||||
total_edge_count=total_edge_count,
|
||||
controlled_start_index=controlled_start_index,
|
||||
)
|
||||
|
|
@ -262,7 +258,7 @@ class DCQMIXAgent(QMIXAgent):
|
|||
def _build_utility_network(self, hidden_dim: int) -> nn.Module:
|
||||
return DirectionalUtilityNetwork(
|
||||
edge_token_dim=self.edge_feature_dim + self.speed_feature_dim,
|
||||
condition_dim=self.time_feature_dim + self.last_reward_dim + self.agent_id_dim,
|
||||
condition_dim=self.last_reward_dim + self.agent_id_dim,
|
||||
num_agents=self.num_agents,
|
||||
num_actions=self.num_actions_per_agent,
|
||||
hidden_dim=hidden_dim,
|
||||
|
|
@ -275,7 +271,6 @@ class DCQMIXAgent(QMIXAgent):
|
|||
return DirectionalQMixer(
|
||||
num_agents=self.num_agents,
|
||||
edge_feature_dim=self.edge_feature_dim,
|
||||
time_feature_dim=self.time_feature_dim,
|
||||
total_edge_count=self.total_edge_count,
|
||||
controlled_start_index=self.controlled_start_index,
|
||||
mixing_hidden_dim=mixing_hidden_dim,
|
||||
|
|
@ -302,7 +297,7 @@ class DCQMIXAgent(QMIXAgent):
|
|||
speed_block_start = edge_block
|
||||
speed_block_end = speed_block_start + self.total_edge_count
|
||||
global_block_start = speed_block_end
|
||||
global_block_end = global_block_start + self.time_feature_dim + self.last_reward_dim
|
||||
global_block_end = global_block_start + self.last_reward_dim
|
||||
|
||||
edge_features = state_tensor[:, :edge_block].view(batch_size, self.total_edge_count, self.edge_feature_dim)
|
||||
edge_features = edge_features[:, self.controlled_start_index:self.controlled_end_index, :]
|
||||
|
|
|
|||
|
|
@ -71,7 +71,6 @@ class MAPPOAgent:
|
|||
num_agents: int,
|
||||
num_actions: int,
|
||||
edge_feature_dim: int = 3,
|
||||
time_feature_dim: int = 3,
|
||||
total_edge_count: int | None = None,
|
||||
controlled_start_index: int = 0,
|
||||
hidden_dim: int = 256,
|
||||
|
|
@ -94,7 +93,6 @@ class MAPPOAgent:
|
|||
self.num_agents = num_agents
|
||||
self.num_actions = num_actions
|
||||
self.edge_feature_dim = edge_feature_dim
|
||||
self.time_feature_dim = time_feature_dim
|
||||
self.total_edge_count = int(total_edge_count if total_edge_count is not None else num_agents)
|
||||
self.controlled_start_index = int(controlled_start_index)
|
||||
self.controlled_end_index = self.controlled_start_index + self.num_agents
|
||||
|
|
@ -112,13 +110,7 @@ class MAPPOAgent:
|
|||
self.speed_feature_dim = 1
|
||||
self.last_reward_dim = 1
|
||||
self.agent_id_dim = 1
|
||||
self.local_obs_dim = (
|
||||
edge_feature_dim
|
||||
+ self.speed_feature_dim
|
||||
+ time_feature_dim
|
||||
+ self.last_reward_dim
|
||||
+ self.agent_id_dim
|
||||
)
|
||||
self.local_obs_dim = edge_feature_dim + self.speed_feature_dim + self.last_reward_dim + self.agent_id_dim
|
||||
|
||||
self.actor = SharedActor(self.local_obs_dim, num_actions, hidden_dim).to(self.device)
|
||||
self.critic = CentralizedCritic(state_dim, critic_hidden_dim).to(self.device)
|
||||
|
|
@ -156,7 +148,7 @@ class MAPPOAgent:
|
|||
speed_block_start = edge_block
|
||||
speed_block_end = speed_block_start + self.total_edge_count
|
||||
global_block_start = speed_block_end
|
||||
global_block_end = global_block_start + self.time_feature_dim + self.last_reward_dim
|
||||
global_block_end = global_block_start + self.last_reward_dim
|
||||
|
||||
edge_features = state_tensor[:, :edge_block].view(batch_size, self.total_edge_count, self.edge_feature_dim)
|
||||
edge_features = edge_features[:, self.controlled_start_index:self.controlled_end_index, :]
|
||||
|
|
|
|||
|
|
@ -119,7 +119,6 @@ class QMIXAgent:
|
|||
target_update: int = 10,
|
||||
device: str = "cuda",
|
||||
edge_feature_dim: int = 3,
|
||||
time_feature_dim: int = 3,
|
||||
total_edge_count: int | None = None,
|
||||
controlled_start_index: int = 0,
|
||||
):
|
||||
|
|
@ -134,7 +133,6 @@ class QMIXAgent:
|
|||
self.batch_size = int(batch_size)
|
||||
self.target_update = max(int(target_update), 1)
|
||||
self.edge_feature_dim = int(edge_feature_dim)
|
||||
self.time_feature_dim = int(time_feature_dim)
|
||||
self.speed_feature_dim = 1
|
||||
self.last_reward_dim = 1
|
||||
self.agent_id_dim = 1
|
||||
|
|
@ -144,13 +142,7 @@ class QMIXAgent:
|
|||
if self.controlled_end_index > self.total_edge_count:
|
||||
raise ValueError("controlled action slice exceeds total edge count")
|
||||
|
||||
self.local_obs_dim = (
|
||||
self.edge_feature_dim
|
||||
+ self.speed_feature_dim
|
||||
+ self.time_feature_dim
|
||||
+ self.last_reward_dim
|
||||
+ self.agent_id_dim
|
||||
)
|
||||
self.local_obs_dim = self.edge_feature_dim + self.speed_feature_dim + self.last_reward_dim + self.agent_id_dim
|
||||
|
||||
self.utility_net = self._build_utility_network(hidden_dim).to(self.device)
|
||||
self.target_utility_net = self._build_utility_network(hidden_dim).to(self.device)
|
||||
|
|
@ -201,7 +193,7 @@ class QMIXAgent:
|
|||
speed_block_start = edge_block
|
||||
speed_block_end = speed_block_start + self.total_edge_count
|
||||
global_block_start = speed_block_end
|
||||
global_block_end = global_block_start + self.time_feature_dim + self.last_reward_dim
|
||||
global_block_end = global_block_start + self.last_reward_dim
|
||||
|
||||
edge_features = state_tensor[:, :edge_block].view(batch_size, self.total_edge_count, self.edge_feature_dim)
|
||||
edge_features = edge_features[:, self.controlled_start_index:self.controlled_end_index, :]
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ class EdgeStructuredExtractor(BaseFeaturesExtractor):
|
|||
State layout is assumed to be:
|
||||
- per-edge traffic features: [speed_norm, occ_norm, flow_norm] * num_edges
|
||||
- per-edge current limit feature: [limit_norm] * num_edges
|
||||
- global features: [time_progress, sin_t, cos_t, last_reward]
|
||||
- global features: [last_reward]
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
|
|
@ -88,7 +88,7 @@ class EdgeStructuredExtractor(BaseFeaturesExtractor):
|
|||
observation_space: spaces.Box,
|
||||
num_edges: int,
|
||||
edge_feature_dim: int = 3,
|
||||
global_feature_dim: int = 4,
|
||||
global_feature_dim: int = 1,
|
||||
total_edge_count: int | None = None,
|
||||
controlled_start_index: int = 0,
|
||||
edge_hidden_dim: int = 16,
|
||||
|
|
@ -279,7 +279,7 @@ class SCTD3Agent:
|
|||
"features_extractor_kwargs": {
|
||||
"num_edges": self.num_zones,
|
||||
"edge_feature_dim": edge_feature_dim,
|
||||
"global_feature_dim": 4,
|
||||
"global_feature_dim": 1,
|
||||
"total_edge_count": total_edge_count if total_edge_count is not None else self.num_zones,
|
||||
"controlled_start_index": controlled_start_index,
|
||||
"edge_hidden_dim": extractor_edge_hidden_dim,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
"""
|
||||
Temporal Credit Assignment MAPPO for SUMO VSL.
|
||||
TAC-MAPPO for SUMO VSL.
|
||||
|
||||
- Actor: same decentralized shared actor style as MAPPO
|
||||
- Critic: current-state query attends over recent decision/outcome history
|
||||
|
|
@ -122,7 +122,7 @@ class TemporalCreditCritic(nn.Module):
|
|||
return self.head(fused)
|
||||
|
||||
|
||||
class TCAMAPPOAgent:
|
||||
class TACMAPPOAgent:
|
||||
"""MAPPO with a temporal credit-assignment critic."""
|
||||
|
||||
def __init__(
|
||||
|
|
@ -131,7 +131,6 @@ class TCAMAPPOAgent:
|
|||
num_agents: int,
|
||||
num_actions: int,
|
||||
edge_feature_dim: int = 3,
|
||||
time_feature_dim: int = 3,
|
||||
total_edge_count: int | None = None,
|
||||
controlled_start_index: int = 0,
|
||||
hidden_dim: int = 256,
|
||||
|
|
@ -158,7 +157,6 @@ class TCAMAPPOAgent:
|
|||
self.num_agents = num_agents
|
||||
self.num_actions = num_actions
|
||||
self.edge_feature_dim = edge_feature_dim
|
||||
self.time_feature_dim = time_feature_dim
|
||||
self.total_edge_count = int(total_edge_count if total_edge_count is not None else num_agents)
|
||||
self.controlled_start_index = int(controlled_start_index)
|
||||
self.controlled_end_index = self.controlled_start_index + self.num_agents
|
||||
|
|
@ -177,13 +175,7 @@ class TCAMAPPOAgent:
|
|||
self.speed_feature_dim = 1
|
||||
self.last_reward_dim = 1
|
||||
self.agent_id_dim = 1
|
||||
self.local_obs_dim = (
|
||||
edge_feature_dim
|
||||
+ self.speed_feature_dim
|
||||
+ time_feature_dim
|
||||
+ self.last_reward_dim
|
||||
+ self.agent_id_dim
|
||||
)
|
||||
self.local_obs_dim = edge_feature_dim + self.speed_feature_dim + self.last_reward_dim + self.agent_id_dim
|
||||
self.reward_feature_dim = 1 + len(REWARD_COMPONENT_COLUMNS)
|
||||
self.history_token_dim = state_dim + num_agents + self.reward_feature_dim
|
||||
|
||||
|
|
@ -239,7 +231,7 @@ class TCAMAPPOAgent:
|
|||
speed_block_start = edge_block
|
||||
speed_block_end = speed_block_start + self.total_edge_count
|
||||
global_block_start = speed_block_end
|
||||
global_block_end = global_block_start + self.time_feature_dim + self.last_reward_dim
|
||||
global_block_end = global_block_start + self.last_reward_dim
|
||||
|
||||
edge_features = state_tensor[:, :edge_block].view(batch_size, self.total_edge_count, self.edge_feature_dim)
|
||||
edge_features = edge_features[:, self.controlled_start_index:self.controlled_end_index, :]
|
||||
|
|
@ -243,7 +243,7 @@ agents:
|
|||
batch_size: 15
|
||||
lr_schedule: "cosine"
|
||||
|
||||
tcamappo:
|
||||
tacmappo:
|
||||
hidden_dim: 256
|
||||
critic_hidden_dim: 256
|
||||
history_window: 6
|
||||
|
|
|
|||
|
|
@ -175,7 +175,7 @@ class SUMOEdgeVSLEnvironment:
|
|||
|
||||
self.action_dims = [self.num_speed_actions] * self.num_controlled_edges
|
||||
self.features_per_edge = 3
|
||||
self._state_dim = (self.features_per_edge + 1) * self.num_edges + 3 + 1
|
||||
self._state_dim = (self.features_per_edge + 1) * self.num_edges + 1
|
||||
|
||||
self.current_step = 0
|
||||
self._sumo_running = False
|
||||
|
|
@ -810,7 +810,6 @@ class SUMOEdgeVSLEnvironment:
|
|||
done = self.current_step >= self.episode_length
|
||||
|
||||
info["reward"] = reward
|
||||
info["step"] = self.current_step
|
||||
info["edge_speeds_kmh"] = (edge_speeds * 3.6).tolist()
|
||||
info["action_applied_mask"] = [
|
||||
bool(apply_control and idx not in self.passive_segment_indices)
|
||||
|
|
@ -1024,10 +1023,6 @@ class SUMOEdgeVSLEnvironment:
|
|||
for idx in range(self.num_edges):
|
||||
state_parts.append(self.current_edge_speeds[idx] / self.free_flow_speed)
|
||||
|
||||
time_progress = self.current_step / max(self.episode_length, 1)
|
||||
state_parts.append(time_progress)
|
||||
state_parts.append(np.sin(2 * np.pi * time_progress))
|
||||
state_parts.append(np.cos(2 * np.pi * time_progress))
|
||||
state_parts.append(self._last_reward)
|
||||
|
||||
return np.array(state_parts, dtype=np.float32)
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ class RewardBlueprint:
|
|||
return "\n".join(lines).rstrip() + "\n"
|
||||
|
||||
|
||||
def build_tca_mappo_reward_blueprint() -> RewardBlueprint:
|
||||
def build_tac_mappo_reward_blueprint() -> RewardBlueprint:
|
||||
"""Build the April-style four-term reward blueprint for corridor VSL."""
|
||||
|
||||
terms = (
|
||||
|
|
@ -127,7 +127,7 @@ def build_tca_mappo_reward_blueprint() -> RewardBlueprint:
|
|||
)
|
||||
|
||||
return RewardBlueprint(
|
||||
name="Four-Term Reward Blueprint For TCA-MAPPO",
|
||||
name="Four-Term Reward Blueprint For TAC-MAPPO",
|
||||
scenario_summary=(
|
||||
"The study controls a segmented freeway VSL corridor under fixed control intervals. "
|
||||
"The reward should stay simple, bounded, and decomposable so that each term has a clear "
|
||||
|
|
@ -162,12 +162,12 @@ def build_tca_mappo_reward_blueprint() -> RewardBlueprint:
|
|||
|
||||
|
||||
def build_reward_blueprint_markdown() -> str:
|
||||
return build_tca_mappo_reward_blueprint().to_markdown()
|
||||
return build_tac_mappo_reward_blueprint().to_markdown()
|
||||
|
||||
|
||||
def iter_required_signals() -> Iterable[str]:
|
||||
signals = set()
|
||||
for term in build_tca_mappo_reward_blueprint().terms:
|
||||
for term in build_tac_mappo_reward_blueprint().terms:
|
||||
signals.update(term.required_signals)
|
||||
return tuple(sorted(signals))
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ from agents.dcqmix_agent import DCQMIXAgent
|
|||
from agents.rule_vsl_agent import RULE_VSL_AGENT_CLASSES
|
||||
from agents.sac_agent import SACAgent
|
||||
from agents.sctd3_agent import SCTD3Agent
|
||||
from agents.tcamappo_agent import TCAMAPPOAgent
|
||||
from agents.tacmappo_agent import TACMAPPOAgent
|
||||
from agents.td3_agent import TD3Agent
|
||||
from envs.edge_vsl_env import SUMOEdgeVSLEnvironment
|
||||
from envs.reward_system import REWARD_COMPONENT_COLUMNS, REWARD_COMPONENT_LABELS
|
||||
|
|
@ -50,7 +50,7 @@ from utils.run_dirs import find_shared_config_path, resolve_checkpoint_root
|
|||
|
||||
|
||||
RULE_BASELINE_NAMES = ("occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl")
|
||||
MODEL_ORDER = [*RULE_BASELINE_NAMES, "ppo", "gpro", "appo", "mappo", "tcamappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"]
|
||||
MODEL_ORDER = [*RULE_BASELINE_NAMES, "ppo", "gpro", "appo", "mappo", "tacmappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"]
|
||||
BASELINE_NAME = "no_control"
|
||||
EVAL_ORDER = [BASELINE_NAME] + MODEL_ORDER
|
||||
MODEL_LABELS = {
|
||||
|
|
@ -62,7 +62,7 @@ MODEL_LABELS = {
|
|||
"gpro": "GPRO-PPO",
|
||||
"appo": "APPO",
|
||||
"mappo": "MAPPO",
|
||||
"tcamappo": "TCA-MAPPO",
|
||||
"tacmappo": "TAC-MAPPO",
|
||||
"dcmappo": "DC-MAPPO",
|
||||
"dqn": "DQN",
|
||||
"madqn": "MA-DQN",
|
||||
|
|
@ -101,7 +101,7 @@ def parse_args():
|
|||
"--models",
|
||||
nargs="*",
|
||||
default=None,
|
||||
help="Subset of models to evaluate, e.g. --models occ_rule_vsl bottleneck_rule_vsl harmonization_rule_vsl ppo gpro tcamappo dcmappo dqn madqn ddqn qmix dcqmix ddpg d3pg sac td3 sctd3",
|
||||
help="Subset of models to evaluate, e.g. --models occ_rule_vsl bottleneck_rule_vsl harmonization_rule_vsl ppo gpro tacmappo dcmappo dqn madqn ddqn qmix dcqmix ddpg d3pg sac td3 sctd3",
|
||||
)
|
||||
parser.add_argument("--seed", type=int, default=42, help="Evaluation seed.")
|
||||
parser.add_argument(
|
||||
|
|
@ -265,7 +265,7 @@ def resolve_project_path(path_str: Optional[str]) -> Optional[str]:
|
|||
def resolve_model_load_path(model_name: str, checkpoint_dir: str) -> str:
|
||||
if model_name in RULE_BASELINE_NAMES:
|
||||
return ""
|
||||
if model_name in {"ppo", "gpro", "appo", "mappo", "tcamappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix"}:
|
||||
if model_name in {"ppo", "gpro", "appo", "mappo", "tacmappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix"}:
|
||||
best_path = os.path.join(checkpoint_dir, "model_best.pt")
|
||||
if os.path.isfile(best_path):
|
||||
return best_path
|
||||
|
|
@ -304,7 +304,6 @@ def build_value_based_agent(agent_cls, agent_cfg: dict, env: SUMOEdgeVSLEnvironm
|
|||
"target_update": agent_cfg.get("target_update", 5),
|
||||
"device": agent_cfg.get("device", "cuda"),
|
||||
"edge_feature_dim": env.features_per_edge,
|
||||
"time_feature_dim": 3,
|
||||
"total_edge_count": env.num_edges,
|
||||
"controlled_start_index": env.controlled_edge_start_index,
|
||||
"num_corridor_blocks": agent_cfg.get("num_corridor_blocks", 2),
|
||||
|
|
@ -431,8 +430,8 @@ def build_agent(model_name: str, config: dict, env: SUMOEdgeVSLEnvironment):
|
|||
lr_schedule=agent_cfg.get("lr_schedule", "cosine"),
|
||||
total_episodes=config.get("training", {}).get("num_episodes", 4000),
|
||||
)
|
||||
if model_name == "tcamappo":
|
||||
return TCAMAPPOAgent(
|
||||
if model_name == "tacmappo":
|
||||
return TACMAPPOAgent(
|
||||
state_dim=env.state_dim,
|
||||
num_agents=env.num_controlled_edges,
|
||||
num_actions=env.action_dim,
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ from envs.reward_system import REWARD_COMPONENT_COLUMNS, REWARD_COMPONENT_LABELS
|
|||
from utils.run_dirs import find_latest_run_root, find_run_root_by_timestamp
|
||||
|
||||
|
||||
MODEL_ORDER = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "appo", "mappo", "tcamappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"]
|
||||
MODEL_ORDER = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "appo", "mappo", "tacmappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"]
|
||||
MODEL_LABELS = {
|
||||
"occ_rule_vsl": "Occ-Rule-VSL",
|
||||
"bottleneck_rule_vsl": "Bottleneck-Rule-VSL",
|
||||
|
|
@ -29,7 +29,7 @@ MODEL_LABELS = {
|
|||
"gpro": "GPRO-PPO",
|
||||
"appo": "APPO",
|
||||
"mappo": "MAPPO",
|
||||
"tcamappo": "TCA-MAPPO",
|
||||
"tacmappo": "TAC-MAPPO",
|
||||
"dcmappo": "DC-MAPPO",
|
||||
"dqn": "DQN",
|
||||
"madqn": "MA-DQN",
|
||||
|
|
@ -50,7 +50,7 @@ MODEL_COLORS = {
|
|||
"gpro": "#6a3d9a",
|
||||
"appo": "#ff7f0e",
|
||||
"mappo": "#2ca02c",
|
||||
"tcamappo": "#7f7f7f",
|
||||
"tacmappo": "#7f7f7f",
|
||||
"dcmappo": "#8c564b",
|
||||
"dqn": "#d62728",
|
||||
"madqn": "#ff9896",
|
||||
|
|
@ -69,7 +69,7 @@ FLOW_LABEL = REWARD_COMPONENT_LABELS.get(FLOW_COLUMN, "Flow")
|
|||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="Plot live training progress from run logs.")
|
||||
parser.add_argument("--model", default=None, help="Model name, e.g. ppo/gpro/appo/mappo/tcamappo/dcmappo/dqn/madqn/ddqn/qmix/dcqmix/ddpg/d3pg/sac/td3/sctd3")
|
||||
parser.add_argument("--model", default=None, help="Model name, e.g. ppo/gpro/appo/mappo/tacmappo/dcmappo/dqn/madqn/ddqn/qmix/dcqmix/ddpg/d3pg/sac/td3/sctd3")
|
||||
parser.add_argument(
|
||||
"--all-models",
|
||||
action="store_true",
|
||||
|
|
|
|||
|
|
@ -20,13 +20,13 @@ from training.train_rule_vsl import (
|
|||
)
|
||||
from training.train_sac import train_sumo_sac
|
||||
from training.train_sctd3 import train_sumo_sctd3
|
||||
from training.train_tcamappo import train_sumo_tcamappo
|
||||
from training.train_tacmappo import train_sumo_tacmappo
|
||||
from training.train_td3 import train_sumo_td3
|
||||
|
||||
|
||||
# DEFAULT_MODELS: List[str] = ["ppo"]
|
||||
DEFAULT_MODELS: List[str] = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "mappo", "tcamappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3"]
|
||||
ALL_MODELS: List[str] = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "appo", "mappo", "tcamappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"]
|
||||
DEFAULT_MODELS: List[str] = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "mappo", "tacmappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3"]
|
||||
ALL_MODELS: List[str] = ["occ_rule_vsl", "bottleneck_rule_vsl", "harmonization_rule_vsl", "ppo", "gpro", "appo", "mappo", "tacmappo", "dcmappo", "dqn", "madqn", "ddqn", "qmix", "dcqmix", "ddpg", "d3pg", "sac", "td3", "sctd3"]
|
||||
|
||||
|
||||
TRAINERS: Dict[str, Callable] = {
|
||||
|
|
@ -37,7 +37,7 @@ TRAINERS: Dict[str, Callable] = {
|
|||
"gpro": train_sumo_gpro,
|
||||
"appo": train_sumo_appo,
|
||||
"mappo": train_sumo_mappo,
|
||||
"tcamappo": train_sumo_tcamappo,
|
||||
"tacmappo": train_sumo_tacmappo,
|
||||
"dcmappo": train_sumo_dcmappo,
|
||||
"dqn": train_sumo_dqn,
|
||||
"madqn": train_sumo_madqn,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
"""
|
||||
Temporal Credit Assignment MAPPO training script for SUMO + TraCI VSL.
|
||||
TAC-MAPPO training script for SUMO + TraCI VSL.
|
||||
"""
|
||||
import copy
|
||||
import os
|
||||
|
|
@ -11,7 +11,7 @@ from tqdm import tqdm
|
|||
|
||||
matplotlib.use("Agg")
|
||||
|
||||
from agents.tcamappo_agent import TCAMAPPOAgent
|
||||
from agents.tacmappo_agent import TACMAPPOAgent
|
||||
from envs.edge_vsl_env import SUMOEdgeVSLEnvironment
|
||||
from envs.reward_system import REWARD_COMPONENT_COLUMNS, average_reward_components, init_reward_component_totals
|
||||
from utils.config import get_agent_config, get_training_config
|
||||
|
|
@ -22,17 +22,17 @@ from utils.run_dirs import resolve_run_dirs, write_shared_run_config
|
|||
from utils.seeding import derive_seed, resolve_base_seed, set_global_seed
|
||||
|
||||
|
||||
def train_sumo_tcamappo(log_dir=None, checkpoint_dir=None, run_timestamp=None):
|
||||
def train_sumo_tacmappo(log_dir=None, checkpoint_dir=None, run_timestamp=None):
|
||||
with open("config_sumo_vsl.yaml", "r", encoding="utf-8") as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
agent_config = get_agent_config(config, "tcamappo")
|
||||
agent_config = get_agent_config(config, "tacmappo")
|
||||
train_config = get_training_config(config)
|
||||
base_seed = resolve_base_seed(train_config)
|
||||
set_global_seed(base_seed)
|
||||
|
||||
_, checkpoint_dir, log_dir = resolve_run_dirs(
|
||||
"tcamappo",
|
||||
"tacmappo",
|
||||
log_dir=log_dir,
|
||||
checkpoint_dir=checkpoint_dir,
|
||||
run_timestamp=run_timestamp,
|
||||
|
|
@ -49,11 +49,11 @@ def train_sumo_tcamappo(log_dir=None, checkpoint_dir=None, run_timestamp=None):
|
|||
run_timestamp=run_timestamp,
|
||||
)
|
||||
|
||||
logger = TrainingLogger(log_dir, "tcamappo")
|
||||
logger = TrainingLogger(log_dir, "tacmappo")
|
||||
env = SUMOEdgeVSLEnvironment(runtime_config)
|
||||
|
||||
print("=" * 70)
|
||||
print("TCA-MAPPO training - SUMO+TraCI VSL environment")
|
||||
print("TAC-MAPPO training - SUMO+TraCI VSL environment")
|
||||
print("=" * 70)
|
||||
print(f" State dim: {env.state_dim}")
|
||||
print(f" Agents: {env.num_edges}")
|
||||
|
|
@ -68,7 +68,7 @@ def train_sumo_tcamappo(log_dir=None, checkpoint_dir=None, run_timestamp=None):
|
|||
print(f" Global seed: {base_seed if base_seed is not None else 'None (random)'}")
|
||||
print()
|
||||
|
||||
agent = TCAMAPPOAgent(
|
||||
agent = TACMAPPOAgent(
|
||||
state_dim=env.state_dim,
|
||||
num_agents=env.num_controlled_edges,
|
||||
num_actions=env.action_dim,
|
||||
|
|
@ -43,7 +43,6 @@ def _build_value_based_agent(agent_builder: Callable[..., object], env, agent_co
|
|||
"target_update": agent_config.get("target_update", 10),
|
||||
"device": agent_config.get("device", "cuda"),
|
||||
"edge_feature_dim": env.features_per_edge,
|
||||
"time_feature_dim": 3,
|
||||
"total_edge_count": env.num_edges,
|
||||
"controlled_start_index": env.controlled_edge_start_index,
|
||||
"num_corridor_blocks": agent_config.get("num_corridor_blocks", 2),
|
||||
|
|
|
|||
Loading…
Reference in New Issue