# SUMO + TraCI VSL training configuration

sumo:
  net_file: "sumo_resource/modified.net.xml"
  # route_file: "sumo_resource/routes_filtered.xml"
  route_file: "sumo_resource/routes_20231002_filtered.xml"
  # route_file: "sumo_resource/routes_20231002_optimized.xml"
  # route_file: "sumo_resource/routes_optimized.xml"
  step_length: 1
  begin_time: 25200
  end_time: 36000
  # begin_time: 39600
  # end_time: 57600
  # gui: true
  gui: false
  no_warnings: true

environment:
  control_interval: 60
  warmup_time: 900
  control_segment_length_m: 1000
  detector_spacing_m: 100
  detector_start_offset_m: 50
  vsl_compliance_rate: 1.0
  passive_prefix_segment_count: 3
  incident:
    enabled: false
    # Measured from the end of warmup / the start of active traffic restoration.
    start_delay_min_range: [10, 20]
    duration_min_range: [10, 20]
    # Sample the stopped vehicle location from the downstream portion of the corridor.
    downstream_fraction_range: [0.65, 0.95]
    target_position_buffer_m: 25.0
    position_tolerance_m: 5.0
    stopped_speed_threshold_ms: 0.1

  # Ordered reference corridor edges. The environment derives 1000 m
  # control segments from this junction-bounded corridor.
  control_edges:
    - "G1523_AM3_4.1"
    - "G1523_AM3_4.2"
    - "G1523_AM3_4.3"
    - "G1523_AM3_4.4"
    - "G1523_AM3_4.5"
    - "G1523_AM3_4.6"
    - "G1523_AM4"
    - "G1523_AM5"
    - "G1523_AM6"
    - "G1523_AM6.1"
    - "G1523_AM6.2"
    - "G1523_AM6.3"
    - "G1523_AM6.4"
    - "G1523_AM6.5"
    - "G1523_AM6.6"
    - "G1523_AM7"
    - "G1523_AM7.1"
    - "G1523_AM7.2"
    - "G1523_AM8"
    - "G1523_AM8.1"
    - "G1523_AM8.2"

  # Available VSL actions in km/h
  speed_actions_kmh: [40, 60, 80, 100, 110]
  free_flow_speed: 30.56

  reward:
    # Throughput term.
    w_flow: 0.4
    # Speed variance term.
    w_var: 0.3
    # Hard-brake penalty weight, adapted by density.
    w_brake_base: 0.1
    w_brake_max: 0.3
    # VSL smoothness penalty.
    w_penalty: 0.2

    rho_critical: 44.75
    k_sigmoid: 0.2
    d_threshold: 5.0
    d_max: 20.0
    C_max: 4924
    v_limit: 30.56
    delta_vsl_max: 16.67
    bottleneck_window_size: 3
    leader_gap_threshold_m: 100.0

training:
  num_episodes: 4000
  random_seed: 42
  log_freq: 10
  save_freq: 20

agents:
  common:
    device: "cuda"

  occ_rule_vsl:
    # Smulders-style local occupancy/speed hysteresis baseline.
    occupancy_release_pct: 12.0
    occupancy_moderate_pct: 20.0
    occupancy_high_pct: 30.0
    occupancy_severe_pct: 40.0
    speed_moderate_ratio: 0.75
    speed_high_ratio: 0.60
    speed_severe_ratio: 0.45
    temporal_step_limit: 1
    spatial_step_limit: 1

  bottleneck_rule_vsl:
    # Hegyi-style downstream bottleneck pre-control baseline.
    bottleneck_occupancy_pct: 25.0
    bottleneck_high_occupancy_pct: 35.0
    bottleneck_speed_ratio: 0.75
    bottleneck_high_speed_ratio: 0.60
    bottleneck_lookahead_segments: 3
    temporal_step_limit: 1
    spatial_step_limit: 1

  harmonization_rule_vsl:
    # Allaby-style speed-harmonization baseline.
    speed_drop_warn_kmh: 10.0
    speed_drop_severe_kmh: 18.0
    harmonization_target_warn_kmh: 80.0
    harmonization_target_severe_kmh: 60.0
    temporal_step_limit: 1
    spatial_step_limit: 1

  dqn:
    hidden_dim: 256
    learning_rate: 0.0003
    gamma: 0.99
    epsilon_start: 1.0
    epsilon_end: 0.01
    epsilon_decay: 600
    buffer_size: 20000
    batch_size: 128
    target_update: 5

  madqn:
    hidden_dim: 256
    learning_rate: 0.0003
    gamma: 0.99
    epsilon_start: 1.0
    epsilon_end: 0.01
    epsilon_decay: 600
    buffer_size: 20000
    batch_size: 128
    target_update: 5

  ddqn:
    hidden_dim: 256
    learning_rate: 0.0003
    gamma: 0.99
    epsilon_start: 1.0
    epsilon_end: 0.01
    epsilon_decay: 600
    buffer_size: 20000
    batch_size: 128
    target_update: 5

  qmix:
    hidden_dim: 256
    mixing_hidden_dim: 256
    learning_rate: 0.0003
    gamma: 0.99
    epsilon_start: 1.0
    epsilon_end: 0.01
    epsilon_decay: 600
    buffer_size: 20000
    batch_size: 128
    target_update: 5

  dcqmix:
    hidden_dim: 256
    mixing_hidden_dim: 256
    num_corridor_blocks: 2
    corridor_kernel_size: 5
    corridor_dropout: 0.05
    learning_rate: 0.0003
    gamma: 0.99
    epsilon_start: 1.0
    epsilon_end: 0.01
    epsilon_decay: 600
    buffer_size: 20000
    batch_size: 128
    target_update: 5

  ppo:
    hidden_layers: [256, 256]
    learning_rate: 0.0003
    gamma: 0.99
    gae_lambda: 0.95
    clip_epsilon: 0.2
    value_coef: 0.5
    entropy_coef: 0.01
    max_grad_norm: 0.5
    ppo_epochs: 4
    batch_size: 15
    lr_schedule: "cosine"

  gpro:
    hidden_layers: [256, 256]
    learning_rate: 0.0003
    gamma: 0.99
    gae_lambda: 0.95
    clip_epsilon: 0.2
    value_coef: 0.5
    entropy_coef: 0.01
    max_grad_norm: 0.5
    ppo_epochs: 4
    batch_size: 15
    lr_schedule: "cosine"
    group_size: 4
    group_advantage_coef: 0.35
    advantage_epsilon: 1.0e-8

  appo:
    hidden_dim: 256
    num_heads: 4
    num_layers: 2
    learning_rate: 0.0003
    gamma: 0.99
    gae_lambda: 0.95
    clip_epsilon: 0.2
    value_coef: 0.5
    entropy_coef: 0.01
    max_grad_norm: 0.5
    ppo_epochs: 4
    batch_size: 15
    lr_schedule: "cosine"

  mappo:
    hidden_dim: 256
    critic_hidden_dim: 256
    learning_rate: 0.0003
    gamma: 0.99
    gae_lambda: 0.95
    clip_epsilon: 0.2
    value_coef: 0.5
    entropy_coef: 0.01
    max_grad_norm: 0.5
    ppo_epochs: 4
    batch_size: 15
    lr_schedule: "cosine"

  tacmappo:
    hidden_dim: 256
    critic_hidden_dim: 256
    history_window: 6
    critic_num_heads: 4
    critic_num_layers: 2
    critic_dropout: 0.05
    learning_rate: 0.0003
    gamma: 0.99
    gae_lambda: 0.95
    clip_epsilon: 0.2
    value_coef: 0.5
    entropy_coef: 0.01
    max_grad_norm: 0.5
    ppo_epochs: 4
    batch_size: 15
    lr_schedule: "cosine"

  dcmappo:
    hidden_dim: 256
    critic_hidden_dim: 256
    num_corridor_blocks: 2
    corridor_kernel_size: 5
    corridor_dropout: 0.05
    learning_rate: 0.0003
    gamma: 0.99
    gae_lambda: 0.95
    clip_epsilon: 0.2
    value_coef: 0.5
    entropy_coef: 0.01
    max_grad_norm: 0.5
    ppo_epochs: 4
    batch_size: 15
    lr_schedule: "cosine"

  ddpg:
    learning_rate: 0.0003
    gamma: 0.99
    buffer_size: 20000
    learning_starts: 200
    batch_size: 128
    tau: 0.005
    exploration_sigma: 0.15
    activation_fn: "relu"
    actor_hidden_dims: [256, 256]
    critic_hidden_dims: [256, 256]

  d3pg:
    learning_rate: 0.0003
    gamma: 0.99
    buffer_size: 20000
    learning_starts: 200
    batch_size: 128
    tau: 0.005
    policy_delay: 2
    exploration_sigma: 0.15
    activation_fn: "relu"
    actor_hidden_dims: [256, 256]
    critic_hidden_dims: [256, 256]

  sac:
    learning_rate: 0.0003
    gamma: 0.99
    buffer_size: 20000
    learning_starts: 200
    batch_size: 128
    tau: 0.005
    ent_coef: "auto"
    target_entropy: "auto"
    target_update_interval: 1
    log_std_init: -3.0
    activation_fn: "relu"
    actor_hidden_dims: [256, 256]
    critic_hidden_dims: [256, 256]

  td3:
    learning_rate: 0.0003
    gamma: 0.99
    buffer_size: 20000
    learning_starts: 200
    batch_size: 128
    tau: 0.005
    policy_delay: 2
    exploration_sigma: 0.15
    activation_fn: "relu"
    actor_hidden_dims: [256, 256]
    critic_hidden_dims: [256, 256]

  sctd3:
    learning_rate: 0.0003
    gamma: 0.99
    buffer_size: 20000
    learning_starts: 200
    batch_size: 128
    tau: 0.005
    policy_delay: 2
    exploration_sigma: 0.15
    activation_fn: "relu"
    actor_hidden_dims: [256, 256]
    critic_hidden_dims: [256, 256]
    extractor_feature_dim: 128
    extractor_edge_hidden_dim: 16
    extractor_global_hidden_dim: 32
    extractor_spatial_blocks: 1
    extractor_kernel_size: 3