# SUMO + TraCI VSL training configuration sumo: net_file: "sumo_resource/modified.net.xml" # route_file: "sumo_resource/routes_filtered.xml" route_file: "sumo_resource/routes_20231002_optimized.xml" # route_file: "sumo_resource/routes_optimized.xml" detector_add_file: "sumo_resource/metrics_il.add.xml" enex_add_file: "sumo_resource/metrics_enex.add.xml" step_length: 1 begin_time: 54000 end_time: 57600 # begin_time: 39600 # end_time: 57600 # gui: true gui: false no_warnings: true environment: control_interval: 60 control_segment_length_m: 1000 detector_spacing_m: 100 passive_prefix_segment_count: 3 # Ordered reference corridor edges. The environment derives 1000 m # control segments from this junction-bounded corridor. control_edges: - "G1523_AM3_4.1" - "G1523_AM3_4.2" - "G1523_AM3_4.3" - "G1523_AM3_4.4" - "G1523_AM3_4.5" - "G1523_AM3_4.6" - "G1523_AM4" - "G1523_AM5" - "G1523_AM6" - "G1523_AM6.1" - "G1523_AM6.2" - "G1523_AM6.3" - "G1523_AM6.4" - "G1523_AM6.5" - "G1523_AM6.6" - "G1523_AM7" - "G1523_AM7.1" - "G1523_AM7.2" - "G1523_AM8" - "G1523_AM8.1" - "G1523_AM8.2" # Available VSL actions in km/h speed_actions_kmh: [40, 60, 80, 100, 110] free_flow_speed: 30.56 reward: # Positive term: throughput w_flow: 0.4 # Negative term: speed dispersion across edges w_var: 0.3 # Density-adaptive hard-brake penalty weight w_brake_base: 0.1 w_brake_max: 0.3 # Penalty for abrupt VSL changes w_penalty: 0.2 rho_critical: 44.75 k_sigmoid: 0.2 d_threshold: 5.0 d_max: 20.0 C_max: 4924 v_limit: 30.56 delta_vsl_max: 16.67 training: num_episodes: 4000 random_seed: 42 log_freq: 10 save_freq: 20 agents: common: device: "cuda" dqn: hidden_dim: 256 learning_rate: 0.0003 gamma: 0.99 epsilon_start: 1.0 epsilon_end: 0.01 epsilon_decay: 600 buffer_size: 20000 batch_size: 128 target_update: 5 ppo: hidden_layers: [256, 256] learning_rate: 0.0003 gamma: 0.99 gae_lambda: 0.95 clip_epsilon: 0.2 value_coef: 0.5 entropy_coef: 0.01 max_grad_norm: 0.5 ppo_epochs: 4 batch_size: 15 lr_schedule: "cosine" gpro: hidden_layers: [256, 256] learning_rate: 0.0003 gamma: 0.99 gae_lambda: 0.95 clip_epsilon: 0.2 value_coef: 0.5 entropy_coef: 0.01 max_grad_norm: 0.5 ppo_epochs: 4 batch_size: 15 lr_schedule: "cosine" group_size: 4 group_advantage_coef: 0.35 advantage_epsilon: 1.0e-8 appo: hidden_dim: 128 num_heads: 4 num_layers: 2 learning_rate: 0.0003 gamma: 0.99 gae_lambda: 0.95 clip_epsilon: 0.2 value_coef: 0.5 entropy_coef: 0.01 max_grad_norm: 0.5 ppo_epochs: 4 batch_size: 15 lr_schedule: "cosine" mappo: hidden_dim: 256 critic_hidden_dim: 256 learning_rate: 0.0003 gamma: 0.99 gae_lambda: 0.95 clip_epsilon: 0.2 value_coef: 0.5 entropy_coef: 0.01 max_grad_norm: 0.5 ppo_epochs: 4 batch_size: 15 lr_schedule: "cosine" tcamappo: hidden_dim: 256 critic_hidden_dim: 256 history_window: 6 critic_num_heads: 4 critic_num_layers: 2 critic_dropout: 0.05 learning_rate: 0.0003 gamma: 0.99 gae_lambda: 0.95 clip_epsilon: 0.2 value_coef: 0.5 entropy_coef: 0.01 max_grad_norm: 0.5 ppo_epochs: 4 batch_size: 15 lr_schedule: "cosine" dcmappo: hidden_dim: 256 critic_hidden_dim: 256 num_corridor_blocks: 2 corridor_kernel_size: 5 corridor_dropout: 0.05 learning_rate: 0.0003 gamma: 0.99 gae_lambda: 0.95 clip_epsilon: 0.2 value_coef: 0.5 entropy_coef: 0.01 max_grad_norm: 0.5 ppo_epochs: 4 batch_size: 15 lr_schedule: "cosine" ddpg: learning_rate: 0.0003 gamma: 0.99 buffer_size: 20000 learning_starts: 200 batch_size: 128 tau: 0.005 exploration_sigma: 0.15 sac: learning_rate: 0.0003 gamma: 0.99 buffer_size: 20000 learning_starts: 200 batch_size: 128 tau: 0.005 ent_coef: "auto" target_entropy: "auto" target_update_interval: 1 log_std_init: -3.0 activation_fn: "relu" actor_hidden_dims: [256, 256] critic_hidden_dims: [256, 256] td3: learning_rate: 0.0003 gamma: 0.99 buffer_size: 20000 learning_starts: 200 batch_size: 128 tau: 0.005 policy_delay: 2 exploration_sigma: 0.15 activation_fn: "relu" actor_hidden_dims: [400, 300] critic_hidden_dims: [400, 300] sctd3: learning_rate: 0.0003 gamma: 0.99 buffer_size: 20000 learning_starts: 200 batch_size: 128 tau: 0.005 policy_delay: 2 exploration_sigma: 0.15 activation_fn: "relu" actor_hidden_dims: [400, 300] critic_hidden_dims: [400, 300] extractor_feature_dim: 128 extractor_edge_hidden_dim: 16 extractor_global_hidden_dim: 32 extractor_spatial_blocks: 1 extractor_kernel_size: 3