# SUMO + TraCI VSL training configuration sumo: net_file: "sumo_resource/modified.net.xml" route_file: "sumo_resource/routes_filtered.xml" # route_file: "sumo_resource/routes_optimized.xml" detector_add_file: "sumo_resource/metrics_il.add.xml" enex_add_file: "sumo_resource/metrics_enex.add.xml" step_length: 0.5 begin_time: 54000 end_time: 57600 # begin_time: 39600 # end_time: 57600 gui: false no_warnings: true environment: control_interval: 60 # Controlled motorway edges control_edges: - "G1523_AM3_4.1" - "G1523_AM3_4.2" - "G1523_AM3_4.3" - "G1523_AM3_4.4" - "G1523_AM3_4.5" - "G1523_AM3_4.6" - "G1523_AM4" - "G1523_AM5" - "G1523_AM6" - "G1523_AM6.1" - "G1523_AM6.2" - "G1523_AM6.3" - "G1523_AM6.4" - "G1523_AM6.5" - "G1523_AM6.6" - "G1523_AM7" - "G1523_AM7.1" - "G1523_AM7.2" - "G1523_AM8" - "G1523_AM8.1" - "G1523_AM8.2" # Available VSL actions in km/h speed_actions_kmh: [40, 60, 80, 100, 110] free_flow_speed: 30.56 reward: # Positive term: throughput w_flow: 0.4 # Negative term: speed dispersion across edges w_var: 0.3 # Density-adaptive hard-brake penalty weight w_brake_base: 0.1 w_brake_max: 0.3 # Penalty for abrupt VSL changes w_penalty: 0.2 rho_critical: 44.75 k_sigmoid: 0.2 d_threshold: 5.0 d_max: 20.0 C_max: 4924 v_limit: 30.56 delta_vsl_max: 16.67 training: num_episodes: 100 random_seed: 42 log_freq: 10 save_freq: 20 agents: common: device: "cuda" dqn: hidden_dim: 256 learning_rate: 0.0003 gamma: 0.99 epsilon_start: 1.0 epsilon_end: 0.01 epsilon_decay: 600 buffer_size: 20000 batch_size: 128 target_update: 5 ppo: hidden_layers: [256, 256] learning_rate: 0.0003 gamma: 0.99 gae_lambda: 0.95 clip_epsilon: 0.2 value_coef: 0.5 entropy_coef: 0.01 max_grad_norm: 0.5 ppo_epochs: 4 batch_size: 15 lr_schedule: "cosine" appo: hidden_dim: 128 num_heads: 4 num_layers: 2 learning_rate: 0.0003 gamma: 0.99 gae_lambda: 0.95 clip_epsilon: 0.2 value_coef: 0.5 entropy_coef: 0.01 max_grad_norm: 0.5 ppo_epochs: 4 batch_size: 15 lr_schedule: "cosine" ddpg: learning_rate: 0.0003 gamma: 0.99 buffer_size: 20000 learning_starts: 200 batch_size: 128 tau: 0.005 exploration_sigma: 0.15 td3: learning_rate: 0.0003 gamma: 0.99 buffer_size: 20000 learning_starts: 200 batch_size: 128 tau: 0.005 policy_delay: 2 exploration_sigma: 0.15