ctm-dqn/config_sumo_vsl.yaml

351 lines
7.7 KiB
YAML

# SUMO + TraCI VSL training configuration
sumo:
net_file: "sumo_resource/modified.net.xml"
# route_file: "sumo_resource/routes_filtered.xml"
route_file: "sumo_resource/routes_20231002_filtered.xml"
# route_file: "sumo_resource/routes_20231002_optimized.xml"
# route_file: "sumo_resource/routes_optimized.xml"
step_length: 1
begin_time: 25200
end_time: 36000
# begin_time: 39600
# end_time: 57600
# gui: true
gui: false
no_warnings: true
environment:
control_interval: 60
warmup_time: 900
control_segment_length_m: 1000
detector_spacing_m: 100
detector_start_offset_m: 50
vsl_compliance_rate: 1.0
passive_prefix_segment_count: 3
incident:
enabled: false
# Measured from the end of warmup / the start of active traffic restoration.
start_delay_min_range: [10, 20]
duration_min_range: [10, 20]
# Sample the stopped vehicle location from the downstream portion of the corridor.
downstream_fraction_range: [0.65, 0.95]
target_position_buffer_m: 25.0
position_tolerance_m: 5.0
stopped_speed_threshold_ms: 0.1
# Ordered reference corridor edges. The environment derives 1000 m
# control segments from this junction-bounded corridor.
control_edges:
- "G1523_AM3_4.1"
- "G1523_AM3_4.2"
- "G1523_AM3_4.3"
- "G1523_AM3_4.4"
- "G1523_AM3_4.5"
- "G1523_AM3_4.6"
- "G1523_AM4"
- "G1523_AM5"
- "G1523_AM6"
- "G1523_AM6.1"
- "G1523_AM6.2"
- "G1523_AM6.3"
- "G1523_AM6.4"
- "G1523_AM6.5"
- "G1523_AM6.6"
- "G1523_AM7"
- "G1523_AM7.1"
- "G1523_AM7.2"
- "G1523_AM8"
- "G1523_AM8.1"
- "G1523_AM8.2"
# Available VSL actions in km/h
speed_actions_kmh: [40, 60, 80, 100, 110]
free_flow_speed: 30.56
reward:
# Throughput term.
w_flow: 0.4
# Speed variance term.
w_var: 0.3
# Hard-brake penalty weight, adapted by density.
w_brake_base: 0.1
w_brake_max: 0.3
# VSL smoothness penalty.
w_penalty: 0.2
rho_critical: 44.75
k_sigmoid: 0.2
d_threshold: 5.0
d_max: 20.0
C_max: 4924
v_limit: 30.56
delta_vsl_max: 16.67
bottleneck_window_size: 3
leader_gap_threshold_m: 100.0
training:
num_episodes: 4000
random_seed: 42
log_freq: 10
save_freq: 20
agents:
common:
device: "cuda"
occ_rule_vsl:
# Smulders-style local occupancy/speed hysteresis baseline.
occupancy_release_pct: 12.0
occupancy_moderate_pct: 20.0
occupancy_high_pct: 30.0
occupancy_severe_pct: 40.0
speed_moderate_ratio: 0.75
speed_high_ratio: 0.60
speed_severe_ratio: 0.45
temporal_step_limit: 1
spatial_step_limit: 1
bottleneck_rule_vsl:
# Hegyi-style downstream bottleneck pre-control baseline.
bottleneck_occupancy_pct: 25.0
bottleneck_high_occupancy_pct: 35.0
bottleneck_speed_ratio: 0.75
bottleneck_high_speed_ratio: 0.60
bottleneck_lookahead_segments: 3
temporal_step_limit: 1
spatial_step_limit: 1
harmonization_rule_vsl:
# Allaby-style speed-harmonization baseline.
speed_drop_warn_kmh: 10.0
speed_drop_severe_kmh: 18.0
harmonization_target_warn_kmh: 80.0
harmonization_target_severe_kmh: 60.0
temporal_step_limit: 1
spatial_step_limit: 1
dqn:
hidden_dim: 256
learning_rate: 0.0003
gamma: 0.99
epsilon_start: 1.0
epsilon_end: 0.01
epsilon_decay: 600
buffer_size: 20000
batch_size: 128
target_update: 5
madqn:
hidden_dim: 256
learning_rate: 0.0003
gamma: 0.99
epsilon_start: 1.0
epsilon_end: 0.01
epsilon_decay: 600
buffer_size: 20000
batch_size: 128
target_update: 5
ddqn:
hidden_dim: 256
learning_rate: 0.0003
gamma: 0.99
epsilon_start: 1.0
epsilon_end: 0.01
epsilon_decay: 600
buffer_size: 20000
batch_size: 128
target_update: 5
qmix:
hidden_dim: 256
mixing_hidden_dim: 256
learning_rate: 0.0003
gamma: 0.99
epsilon_start: 1.0
epsilon_end: 0.01
epsilon_decay: 600
buffer_size: 20000
batch_size: 128
target_update: 5
dcqmix:
hidden_dim: 256
mixing_hidden_dim: 256
num_corridor_blocks: 2
corridor_kernel_size: 5
corridor_dropout: 0.05
learning_rate: 0.0003
gamma: 0.99
epsilon_start: 1.0
epsilon_end: 0.01
epsilon_decay: 600
buffer_size: 20000
batch_size: 128
target_update: 5
ppo:
hidden_layers: [256, 256]
learning_rate: 0.0003
gamma: 0.99
gae_lambda: 0.95
clip_epsilon: 0.2
value_coef: 0.5
entropy_coef: 0.01
max_grad_norm: 0.5
ppo_epochs: 4
batch_size: 15
lr_schedule: "cosine"
gpro:
hidden_layers: [256, 256]
learning_rate: 0.0003
gamma: 0.99
gae_lambda: 0.95
clip_epsilon: 0.2
value_coef: 0.5
entropy_coef: 0.01
max_grad_norm: 0.5
ppo_epochs: 4
batch_size: 15
lr_schedule: "cosine"
group_size: 4
group_advantage_coef: 0.35
advantage_epsilon: 1.0e-8
appo:
hidden_dim: 256
num_heads: 4
num_layers: 2
learning_rate: 0.0003
gamma: 0.99
gae_lambda: 0.95
clip_epsilon: 0.2
value_coef: 0.5
entropy_coef: 0.01
max_grad_norm: 0.5
ppo_epochs: 4
batch_size: 15
lr_schedule: "cosine"
mappo:
hidden_dim: 256
critic_hidden_dim: 256
learning_rate: 0.0003
gamma: 0.99
gae_lambda: 0.95
clip_epsilon: 0.2
value_coef: 0.5
entropy_coef: 0.01
max_grad_norm: 0.5
ppo_epochs: 4
batch_size: 15
lr_schedule: "cosine"
tacmappo:
hidden_dim: 256
critic_hidden_dim: 256
history_window: 6
critic_num_heads: 4
critic_num_layers: 2
critic_dropout: 0.05
learning_rate: 0.0003
gamma: 0.99
gae_lambda: 0.95
clip_epsilon: 0.2
value_coef: 0.5
entropy_coef: 0.01
max_grad_norm: 0.5
ppo_epochs: 4
batch_size: 15
lr_schedule: "cosine"
dcmappo:
hidden_dim: 256
critic_hidden_dim: 256
num_corridor_blocks: 2
corridor_kernel_size: 5
corridor_dropout: 0.05
learning_rate: 0.0003
gamma: 0.99
gae_lambda: 0.95
clip_epsilon: 0.2
value_coef: 0.5
entropy_coef: 0.01
max_grad_norm: 0.5
ppo_epochs: 4
batch_size: 15
lr_schedule: "cosine"
ddpg:
learning_rate: 0.0003
gamma: 0.99
buffer_size: 20000
learning_starts: 200
batch_size: 128
tau: 0.005
exploration_sigma: 0.15
activation_fn: "relu"
actor_hidden_dims: [256, 256]
critic_hidden_dims: [256, 256]
d3pg:
learning_rate: 0.0003
gamma: 0.99
buffer_size: 20000
learning_starts: 200
batch_size: 128
tau: 0.005
policy_delay: 2
exploration_sigma: 0.15
activation_fn: "relu"
actor_hidden_dims: [256, 256]
critic_hidden_dims: [256, 256]
sac:
learning_rate: 0.0003
gamma: 0.99
buffer_size: 20000
learning_starts: 200
batch_size: 128
tau: 0.005
ent_coef: "auto"
target_entropy: "auto"
target_update_interval: 1
log_std_init: -3.0
activation_fn: "relu"
actor_hidden_dims: [256, 256]
critic_hidden_dims: [256, 256]
td3:
learning_rate: 0.0003
gamma: 0.99
buffer_size: 20000
learning_starts: 200
batch_size: 128
tau: 0.005
policy_delay: 2
exploration_sigma: 0.15
activation_fn: "relu"
actor_hidden_dims: [256, 256]
critic_hidden_dims: [256, 256]
sctd3:
learning_rate: 0.0003
gamma: 0.99
buffer_size: 20000
learning_starts: 200
batch_size: 128
tau: 0.005
policy_delay: 2
exploration_sigma: 0.15
activation_fn: "relu"
actor_hidden_dims: [256, 256]
critic_hidden_dims: [256, 256]
extractor_feature_dim: 128
extractor_edge_hidden_dim: 16
extractor_global_hidden_dim: 32
extractor_spatial_blocks: 1
extractor_kernel_size: 3