# SUMO + TraCI VSL training configuration sumo: net_file: "sumo_resource/modified.net.xml" # route_file: "sumo_resource/routes_filtered.xml" route_file: "sumo_resource/routes_20231002_filtered.xml" # route_file: "sumo_resource/routes_20231002_optimized.xml" # route_file: "sumo_resource/routes_optimized.xml" step_length: 1 begin_time: 25200 end_time: 36000 # begin_time: 39600 # end_time: 57600 # gui: true gui: false no_warnings: true environment: control_interval: 60 warmup_time: 900 control_segment_length_m: 1000 detector_spacing_m: 100 detector_start_offset_m: 50 vsl_compliance_rate: 1.0 passive_prefix_segment_count: 3 incident: enabled: false # Measured from the end of warmup / the start of active traffic restoration. start_delay_min_range: [10, 20] duration_min_range: [10, 20] # Sample the stopped vehicle location from the downstream portion of the corridor. downstream_fraction_range: [0.65, 0.95] target_position_buffer_m: 25.0 position_tolerance_m: 5.0 stopped_speed_threshold_ms: 0.1 # Ordered reference corridor edges. The environment derives 1000 m # control segments from this junction-bounded corridor. control_edges: - "G1523_AM3_4.1" - "G1523_AM3_4.2" - "G1523_AM3_4.3" - "G1523_AM3_4.4" - "G1523_AM3_4.5" - "G1523_AM3_4.6" - "G1523_AM4" - "G1523_AM5" - "G1523_AM6" - "G1523_AM6.1" - "G1523_AM6.2" - "G1523_AM6.3" - "G1523_AM6.4" - "G1523_AM6.5" - "G1523_AM6.6" - "G1523_AM7" - "G1523_AM7.1" - "G1523_AM7.2" - "G1523_AM8" - "G1523_AM8.1" - "G1523_AM8.2" # Available VSL actions in km/h speed_actions_kmh: [40, 60, 80, 100, 110] free_flow_speed: 30.56 reward: # Throughput term. w_flow: 0.4 # Speed variance term. w_var: 0.3 # Hard-brake penalty weight, adapted by density. w_brake_base: 0.1 w_brake_max: 0.3 # VSL smoothness penalty. w_penalty: 0.2 rho_critical: 44.75 k_sigmoid: 0.2 d_threshold: 5.0 d_max: 20.0 C_max: 4924 v_limit: 30.56 delta_vsl_max: 16.67 bottleneck_window_size: 3 leader_gap_threshold_m: 100.0 training: num_episodes: 4000 random_seed: 42 log_freq: 10 save_freq: 20 agents: common: device: "cuda" occ_rule_vsl: # Smulders-style local occupancy/speed hysteresis baseline. occupancy_release_pct: 12.0 occupancy_moderate_pct: 20.0 occupancy_high_pct: 30.0 occupancy_severe_pct: 40.0 speed_moderate_ratio: 0.75 speed_high_ratio: 0.60 speed_severe_ratio: 0.45 temporal_step_limit: 1 spatial_step_limit: 1 bottleneck_rule_vsl: # Hegyi-style downstream bottleneck pre-control baseline. bottleneck_occupancy_pct: 25.0 bottleneck_high_occupancy_pct: 35.0 bottleneck_speed_ratio: 0.75 bottleneck_high_speed_ratio: 0.60 bottleneck_lookahead_segments: 3 temporal_step_limit: 1 spatial_step_limit: 1 harmonization_rule_vsl: # Allaby-style speed-harmonization baseline. speed_drop_warn_kmh: 10.0 speed_drop_severe_kmh: 18.0 harmonization_target_warn_kmh: 80.0 harmonization_target_severe_kmh: 60.0 temporal_step_limit: 1 spatial_step_limit: 1 dqn: hidden_dim: 256 learning_rate: 0.0003 gamma: 0.99 epsilon_start: 1.0 epsilon_end: 0.01 epsilon_decay: 600 buffer_size: 20000 batch_size: 128 target_update: 5 madqn: hidden_dim: 256 learning_rate: 0.0003 gamma: 0.99 epsilon_start: 1.0 epsilon_end: 0.01 epsilon_decay: 600 buffer_size: 20000 batch_size: 128 target_update: 5 ddqn: hidden_dim: 256 learning_rate: 0.0003 gamma: 0.99 epsilon_start: 1.0 epsilon_end: 0.01 epsilon_decay: 600 buffer_size: 20000 batch_size: 128 target_update: 5 qmix: hidden_dim: 256 mixing_hidden_dim: 256 learning_rate: 0.0003 gamma: 0.99 epsilon_start: 1.0 epsilon_end: 0.01 epsilon_decay: 600 buffer_size: 20000 batch_size: 128 target_update: 5 dcqmix: hidden_dim: 256 mixing_hidden_dim: 256 num_corridor_blocks: 2 corridor_kernel_size: 5 corridor_dropout: 0.05 learning_rate: 0.0003 gamma: 0.99 epsilon_start: 1.0 epsilon_end: 0.01 epsilon_decay: 600 buffer_size: 20000 batch_size: 128 target_update: 5 ppo: hidden_layers: [256, 256] learning_rate: 0.0003 gamma: 0.99 gae_lambda: 0.95 clip_epsilon: 0.2 value_coef: 0.5 entropy_coef: 0.01 max_grad_norm: 0.5 ppo_epochs: 4 batch_size: 15 lr_schedule: "cosine" gpro: hidden_layers: [256, 256] learning_rate: 0.0003 gamma: 0.99 gae_lambda: 0.95 clip_epsilon: 0.2 value_coef: 0.5 entropy_coef: 0.01 max_grad_norm: 0.5 ppo_epochs: 4 batch_size: 15 lr_schedule: "cosine" group_size: 4 group_advantage_coef: 0.35 advantage_epsilon: 1.0e-8 appo: hidden_dim: 256 num_heads: 4 num_layers: 2 learning_rate: 0.0003 gamma: 0.99 gae_lambda: 0.95 clip_epsilon: 0.2 value_coef: 0.5 entropy_coef: 0.01 max_grad_norm: 0.5 ppo_epochs: 4 batch_size: 15 lr_schedule: "cosine" mappo: hidden_dim: 256 critic_hidden_dim: 256 learning_rate: 0.0003 gamma: 0.99 gae_lambda: 0.95 clip_epsilon: 0.2 value_coef: 0.5 entropy_coef: 0.01 max_grad_norm: 0.5 ppo_epochs: 4 batch_size: 15 lr_schedule: "cosine" tacmappo: hidden_dim: 256 critic_hidden_dim: 256 history_window: 6 critic_num_heads: 4 critic_num_layers: 2 critic_dropout: 0.05 learning_rate: 0.0003 gamma: 0.99 gae_lambda: 0.95 clip_epsilon: 0.2 value_coef: 0.5 entropy_coef: 0.01 max_grad_norm: 0.5 ppo_epochs: 4 batch_size: 15 lr_schedule: "cosine" dcmappo: hidden_dim: 256 critic_hidden_dim: 256 num_corridor_blocks: 2 corridor_kernel_size: 5 corridor_dropout: 0.05 learning_rate: 0.0003 gamma: 0.99 gae_lambda: 0.95 clip_epsilon: 0.2 value_coef: 0.5 entropy_coef: 0.01 max_grad_norm: 0.5 ppo_epochs: 4 batch_size: 15 lr_schedule: "cosine" ddpg: learning_rate: 0.0003 gamma: 0.99 buffer_size: 20000 learning_starts: 200 batch_size: 128 tau: 0.005 exploration_sigma: 0.15 activation_fn: "relu" actor_hidden_dims: [256, 256] critic_hidden_dims: [256, 256] d3pg: learning_rate: 0.0003 gamma: 0.99 buffer_size: 20000 learning_starts: 200 batch_size: 128 tau: 0.005 policy_delay: 2 exploration_sigma: 0.15 activation_fn: "relu" actor_hidden_dims: [256, 256] critic_hidden_dims: [256, 256] sac: learning_rate: 0.0003 gamma: 0.99 buffer_size: 20000 learning_starts: 200 batch_size: 128 tau: 0.005 ent_coef: "auto" target_entropy: "auto" target_update_interval: 1 log_std_init: -3.0 activation_fn: "relu" actor_hidden_dims: [256, 256] critic_hidden_dims: [256, 256] td3: learning_rate: 0.0003 gamma: 0.99 buffer_size: 20000 learning_starts: 200 batch_size: 128 tau: 0.005 policy_delay: 2 exploration_sigma: 0.15 activation_fn: "relu" actor_hidden_dims: [256, 256] critic_hidden_dims: [256, 256] sctd3: learning_rate: 0.0003 gamma: 0.99 buffer_size: 20000 learning_starts: 200 batch_size: 128 tau: 0.005 policy_delay: 2 exploration_sigma: 0.15 activation_fn: "relu" actor_hidden_dims: [256, 256] critic_hidden_dims: [256, 256] extractor_feature_dim: 128 extractor_edge_hidden_dim: 16 extractor_global_hidden_dim: 32 extractor_spatial_blocks: 1 extractor_kernel_size: 3