470 lines
18 KiB
Python
470 lines
18 KiB
Python
"""
|
||
基于 SUMO+TraCI 的可变限速(VSL)强化学习环境 (优化版)
|
||
|
||
优化点:
|
||
1. 状态压缩: 每zone聚合为 [mean_speed, mean_occ, flow] + zone限速 + 时间特征
|
||
2. 奖励函数: 多目标 (吞吐量 + 速度均匀性 + 限速平滑惩罚)
|
||
3. 动作空间: MultiDiscrete (每zone独立选速度), 不再指数爆炸
|
||
4. 批量TraCI: 减少单次调用量
|
||
5. 使用精简路由文件
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import numpy as np
|
||
from typing import Tuple, Dict, List, Optional
|
||
from collections import defaultdict
|
||
|
||
# 确保 traci/sumolib 可导入
|
||
try:
|
||
import sumo as _sumo_pkg
|
||
_tools = os.path.join(_sumo_pkg.SUMO_HOME, "tools")
|
||
if _tools not in sys.path:
|
||
sys.path.insert(0, _tools)
|
||
except ImportError:
|
||
pass
|
||
|
||
import traci
|
||
|
||
from sumo_network_parser import SUMONetworkParser
|
||
|
||
|
||
class SUMOVSLEnvironment:
|
||
"""基于 SUMO+TraCI 的 VSL 强化学习环境"""
|
||
|
||
def __init__(self, config: dict):
|
||
sumo_cfg = config["sumo"]
|
||
env_cfg = config["environment"]
|
||
|
||
# SUMO 参数
|
||
self.net_file = sumo_cfg["net_file"]
|
||
self.route_file = sumo_cfg["route_file"]
|
||
self.detector_add_file = sumo_cfg["detector_add_file"]
|
||
self.enex_add_file = sumo_cfg["enex_add_file"]
|
||
self.step_length = sumo_cfg["step_length"]
|
||
self.begin_time = sumo_cfg["begin_time"]
|
||
self.end_time = sumo_cfg["end_time"]
|
||
self.use_gui = sumo_cfg.get("gui", False)
|
||
self.no_warnings = sumo_cfg.get("no_warnings", True)
|
||
|
||
# 环境参数
|
||
self.control_interval = env_cfg["control_interval"] # 秒
|
||
self.steps_per_action = int(self.control_interval / self.step_length)
|
||
self.episode_length = int((self.end_time - self.begin_time) / self.control_interval)
|
||
|
||
# 速度选项
|
||
self.speed_actions_kmh = np.array(env_cfg["speed_actions_kmh"], dtype=float)
|
||
self.speed_actions_ms = self.speed_actions_kmh / 3.6
|
||
self.num_speed_actions = len(self.speed_actions_kmh)
|
||
self.free_flow_speed = env_cfg["free_flow_speed"]
|
||
|
||
# Zone 划分
|
||
self.zone_edges: List[List[str]] = env_cfg["zone_edges"]
|
||
self.num_control_zones = len(self.zone_edges)
|
||
|
||
# 奖励参数
|
||
self.reward_cfg = env_cfg.get("reward", {})
|
||
self.w1 = self.reward_cfg.get("w_flow", 0.4)
|
||
self.w2 = self.reward_cfg.get("w_var", 0.3)
|
||
self.w_base = self.reward_cfg.get("w_brake_base", 0.1)
|
||
self.w_max = self.reward_cfg.get("w_brake_max", 0.5)
|
||
self.w4 = self.reward_cfg.get("w_penalty", 0.2)
|
||
self.rho_critical = self.reward_cfg.get("rho_critical", 35.0)
|
||
self.k_sigmoid = self.reward_cfg.get("k_sigmoid", 0.2)
|
||
self.d_th = self.reward_cfg.get("d_threshold", 3.0)
|
||
self.d_max = self.reward_cfg.get("d_max", 8.0)
|
||
self.C_max = self.reward_cfg.get("C_max", 6000.0)
|
||
self.v_limit = self.reward_cfg.get("v_limit", 33.33)
|
||
self.delta_vsl_max = self.reward_cfg.get("delta_vsl_max", 60.0 / 3.6)
|
||
|
||
# 解析网络拓扑
|
||
self.parser = SUMONetworkParser(
|
||
detector_add_file=self.detector_add_file,
|
||
net_file=self.net_file,
|
||
)
|
||
|
||
# 构建 zone -> edge 映射
|
||
self.all_zone_edges = []
|
||
self.zone_of_edge: Dict[str, int] = {}
|
||
for zi, edges in enumerate(self.zone_edges):
|
||
for e in edges:
|
||
self.zone_of_edge[e] = zi
|
||
self.all_zone_edges.append(edges)
|
||
|
||
# 构建 zone -> 检测器ID列表 映射 (用于批量读取)
|
||
self.zone_detector_ids: List[List[str]] = []
|
||
for zi, edges in enumerate(self.zone_edges):
|
||
det_ids = []
|
||
for edge_id in edges:
|
||
ei = self.parser.edge_info.get(edge_id)
|
||
if ei:
|
||
for (lane_idx, pos_idx), det_id in ei.detectors.items():
|
||
det_ids.append(det_id)
|
||
self.zone_detector_ids.append(det_ids)
|
||
|
||
# 全部检测器列表(用于 metrics)
|
||
self.all_detector_ids = []
|
||
for dets in self.zone_detector_ids:
|
||
self.all_detector_ids.extend(dets)
|
||
|
||
# 动作空间: MultiDiscrete, 每zone独立选速度
|
||
# action = List[int], 长度为 num_control_zones, 每个 ∈ [0, num_speed_actions)
|
||
# 但为了兼容单离散 PPO, 仍可用 flatten: action_dim = num_speed_actions * num_control_zones
|
||
# 这里采用 MultiDiscrete 方式, action_dim = num_speed_actions (每个head)
|
||
self.action_dims = [self.num_speed_actions] * self.num_control_zones
|
||
|
||
# 状态维度:
|
||
# 每zone: [mean_speed_norm, mean_occupancy, flow_norm] = 3
|
||
# zone限速: num_zones * 1
|
||
# 时间特征: [time_progress, sin_time, cos_time] = 3
|
||
# 上一步奖励: 1
|
||
self.features_per_zone = 3
|
||
self._state_dim = (self.features_per_zone + 1) * self.num_control_zones + 3 + 1
|
||
|
||
# 运行时状态
|
||
self.current_step = 0
|
||
self._sumo_running = False
|
||
self._episode_count = 0
|
||
self.current_zone_speeds = np.full(self.num_control_zones, self.free_flow_speed)
|
||
self._prev_zone_speeds = np.full(self.num_control_zones, self.free_flow_speed)
|
||
self._last_reward = 0.0
|
||
self.episode_metrics: List[Dict] = []
|
||
|
||
print(f"SUMO VSL Environment (优化版) 初始化完成:")
|
||
print(f" Edges: {self.parser.num_edges}, Zones: {self.num_control_zones}")
|
||
print(f" Action: MultiDiscrete {self.action_dims} (每zone {self.num_speed_actions} 档)")
|
||
print(f" State dim: {self._state_dim}")
|
||
print(f" Episode length: {self.episode_length} steps ({self.control_interval}s each)")
|
||
print(f" SUMO steps per action: {self.steps_per_action}")
|
||
print(f" 奖励权重: flow={self.w1}, var={self.w2}, brake={self.w_base}-{self.w_max}, penalty={self.w4}")
|
||
det_count = sum(len(d) for d in self.zone_detector_ids)
|
||
print(f" 检测器总数: {det_count} (分布在 {self.num_control_zones} 个zone)")
|
||
|
||
@property
|
||
def state_dim(self) -> int:
|
||
return self._state_dim
|
||
|
||
@property
|
||
def action_dim(self) -> int:
|
||
"""兼容旧接口: 返回每个zone的动作数"""
|
||
return self.num_speed_actions
|
||
|
||
# ==================== SUMO 进程管理 ====================
|
||
|
||
def _start_sumo(self, seed: Optional[int] = None):
|
||
"""启动 SUMO 仿真"""
|
||
if self._sumo_running:
|
||
self._close_sumo()
|
||
|
||
binary_name = "sumo-gui" if self.use_gui else "sumo"
|
||
try:
|
||
import sumolib
|
||
sumo_binary = sumolib.checkBinary(binary_name)
|
||
except Exception:
|
||
sumo_binary = binary_name
|
||
|
||
cmd = [
|
||
sumo_binary,
|
||
"-n", self.net_file,
|
||
"-r", self.route_file,
|
||
"-a", f"{self.detector_add_file},{self.enex_add_file}",
|
||
"--step-length", str(self.step_length),
|
||
"-b", str(self.begin_time),
|
||
"-e", str(self.end_time),
|
||
"--collision.action", "warn",
|
||
"--quit-on-end", "true",
|
||
]
|
||
if self.no_warnings:
|
||
cmd += ["--no-warnings", "true"]
|
||
if seed is not None:
|
||
cmd += ["--seed", str(seed)]
|
||
if self.use_gui:
|
||
cmd += ["--start", "true", "--gui-settings-file", "sumo_resource/gui.settings.xml"]
|
||
|
||
traci.start(cmd, label=f"vsl_{self._episode_count}")
|
||
self._sumo_running = True
|
||
|
||
def _close_sumo(self):
|
||
"""关闭 SUMO 仿真"""
|
||
if self._sumo_running:
|
||
try:
|
||
traci.close()
|
||
except Exception:
|
||
pass
|
||
self._sumo_running = False
|
||
|
||
# ==================== RL 接口 ====================
|
||
|
||
def reset(self, seed: Optional[int] = None) -> np.ndarray:
|
||
"""重置环境,启动新 episode"""
|
||
self._episode_count += 1
|
||
self.current_step = 0
|
||
self.episode_metrics = []
|
||
self.current_zone_speeds = np.full(self.num_control_zones, self.free_flow_speed)
|
||
self._prev_zone_speeds = np.full(self.num_control_zones, self.free_flow_speed)
|
||
self._last_reward = 0.0
|
||
|
||
self._start_sumo(seed=seed)
|
||
|
||
return self._collect_state()
|
||
|
||
def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, Dict]:
|
||
"""
|
||
执行一步 RL 交互
|
||
|
||
Args:
|
||
action: shape=(num_control_zones,), 每个值 ∈ [0, num_speed_actions)
|
||
"""
|
||
# 1. 保存上一步限速,解码并应用新限速
|
||
self._prev_zone_speeds = self.current_zone_speeds.copy()
|
||
zone_speeds = self._decode_action(action)
|
||
self.current_zone_speeds = zone_speeds
|
||
self._apply_vsl(zone_speeds)
|
||
|
||
# 2. 推进 SUMO 仿真
|
||
self._interval_arrived = 0
|
||
self._interval_departed = 0
|
||
for _ in range(self.steps_per_action):
|
||
traci.simulationStep()
|
||
self._interval_arrived += traci.simulation.getArrivedNumber()
|
||
self._interval_departed += traci.simulation.getDepartedNumber()
|
||
|
||
# 3. 收集状态
|
||
state = self._collect_state()
|
||
|
||
# 4. 计算奖励和指标
|
||
info = self._collect_metrics()
|
||
reward = self._calculate_reward(info)
|
||
self._last_reward = reward
|
||
|
||
# 5. 判断终止
|
||
self.current_step += 1
|
||
done = self.current_step >= self.episode_length
|
||
|
||
# 6. 记录
|
||
info["reward"] = reward
|
||
info["step"] = self.current_step
|
||
info["zone_speeds_kmh"] = (zone_speeds * 3.6).tolist()
|
||
self.episode_metrics.append(info)
|
||
|
||
if done:
|
||
self._close_sumo()
|
||
|
||
return state, reward, done, info
|
||
|
||
def close(self):
|
||
"""关闭环境"""
|
||
self._close_sumo()
|
||
|
||
# ==================== 动作解码 ====================
|
||
|
||
def _decode_action(self, action: np.ndarray) -> np.ndarray:
|
||
"""将MultiDiscrete动作解码为各zone的限速 (m/s)
|
||
|
||
Args:
|
||
action: shape=(num_control_zones,), int array
|
||
"""
|
||
zone_speeds = np.array([
|
||
self.speed_actions_ms[int(a)] for a in action
|
||
])
|
||
return zone_speeds
|
||
|
||
def _apply_vsl(self, zone_speeds: np.ndarray):
|
||
"""通过 TraCI 将限速应用到各 edge"""
|
||
for zi, speed in enumerate(zone_speeds):
|
||
for edge_id in self.all_zone_edges[zi]:
|
||
traci.edge.setMaxSpeed(edge_id, float(speed))
|
||
|
||
# ==================== 状态收集 (压缩版) ====================
|
||
|
||
def _collect_state(self) -> np.ndarray:
|
||
"""
|
||
收集压缩状态向量
|
||
|
||
结构:
|
||
[
|
||
对每个 zone:
|
||
mean_speed_normalized (zone内所有有效检测器的均速 / free_flow)
|
||
mean_occupancy (zone内所有有效检测器的占有率 / 100)
|
||
flow_normalized (zone内通过检测器的车辆数归一化)
|
||
对每个 zone:
|
||
current_speed_limit_norm (当前限速 / free_flow)
|
||
time_progress (当前步 / episode总步)
|
||
sin(2π * time_progress) (时间周期特征)
|
||
cos(2π * time_progress)
|
||
last_reward_normalized (上一步奖励 / 10)
|
||
]
|
||
"""
|
||
state_parts = []
|
||
|
||
for zi, det_ids in enumerate(self.zone_detector_ids):
|
||
speeds = []
|
||
occupancies = []
|
||
flow = 0
|
||
|
||
for det_id in det_ids:
|
||
try:
|
||
spd = traci.inductionloop.getLastIntervalMeanSpeed(det_id)
|
||
occ = traci.inductionloop.getLastIntervalOccupancy(det_id)
|
||
cnt = traci.inductionloop.getLastIntervalVehicleNumber(det_id)
|
||
if spd <= 0:
|
||
spd = traci.lane.getMaxSpeed(traci.inductionloop.getLaneID(det_id))
|
||
speeds.append(spd)
|
||
occupancies.append(occ)
|
||
flow += cnt
|
||
except Exception:
|
||
pass
|
||
|
||
mean_speed_norm = (np.mean(speeds) / self.free_flow_speed) if speeds else 1.0
|
||
mean_occ = (np.mean(occupancies) / 100.0) if occupancies else 0.0
|
||
# 流量归一化: 假设每zone最大 ~200 veh/min
|
||
flow_norm = min(flow / 200.0, 1.0)
|
||
|
||
state_parts.extend([
|
||
np.clip(mean_speed_norm, 0.0, 1.5),
|
||
np.clip(mean_occ, 0.0, 1.0),
|
||
flow_norm,
|
||
])
|
||
|
||
# zone 限速
|
||
for zi in range(self.num_control_zones):
|
||
state_parts.append(self.current_zone_speeds[zi] / self.free_flow_speed)
|
||
|
||
# 时间特征
|
||
time_progress = self.current_step / max(self.episode_length, 1)
|
||
state_parts.append(time_progress)
|
||
state_parts.append(np.sin(2 * np.pi * time_progress))
|
||
state_parts.append(np.cos(2 * np.pi * time_progress))
|
||
|
||
# 上一步奖励
|
||
state_parts.append(self._last_reward / 10.0)
|
||
|
||
return np.array(state_parts, dtype=np.float32)
|
||
|
||
# ==================== 指标收集 ====================
|
||
|
||
def _collect_metrics(self) -> Dict:
|
||
"""从 SUMO 收集交通指标"""
|
||
info = {}
|
||
|
||
# 吞吐量
|
||
throughput = self._interval_arrived * (3600.0 / self.control_interval)
|
||
info["throughput"] = throughput
|
||
info["arrived_count"] = self._interval_arrived
|
||
info["departed_count"] = self._interval_departed
|
||
|
||
# 每zone的速度和占有率 (用于奖励计算)
|
||
zone_speeds = []
|
||
zone_occs = []
|
||
all_speeds = []
|
||
|
||
for zi, det_ids in enumerate(self.zone_detector_ids):
|
||
speeds = []
|
||
occs = []
|
||
for det_id in det_ids:
|
||
try:
|
||
spd = traci.inductionloop.getLastIntervalMeanSpeed(det_id)
|
||
occ = traci.inductionloop.getLastIntervalOccupancy(det_id)
|
||
if spd <= 0:
|
||
spd = traci.lane.getMaxSpeed(traci.inductionloop.getLaneID(det_id))
|
||
speeds.append(spd)
|
||
occs.append(occ)
|
||
except Exception:
|
||
pass
|
||
zone_mean_spd = np.mean(speeds) if speeds else self.free_flow_speed
|
||
zone_mean_occ = np.mean(occs) if occs else 0.0
|
||
zone_speeds.append(zone_mean_spd)
|
||
zone_occs.append(zone_mean_occ)
|
||
all_speeds.extend(speeds)
|
||
|
||
info["zone_speeds_ms"] = zone_speeds
|
||
info["zone_occupancies"] = zone_occs
|
||
info["mean_speed"] = np.mean(all_speeds) if all_speeds else 0.0
|
||
info["mean_speed_kmh"] = info["mean_speed"] * 3.6
|
||
info["mean_occupancy"] = np.mean(zone_occs) if zone_occs else 0.0
|
||
|
||
# 速度标准差 (用于均匀性奖励)
|
||
info["speed_std"] = np.std(all_speeds) if len(all_speeds) > 1 else 0.0
|
||
|
||
# 路网中当前车辆数
|
||
try:
|
||
info["num_vehicles"] = traci.vehicle.getIDCount()
|
||
except Exception:
|
||
info["num_vehicles"] = 0
|
||
|
||
# 计算密度 (辆/公里)
|
||
total_length_km = sum(self.parser.edge_info[e].length for edges in self.zone_edges for e in edges if e in self.parser.edge_info) / 1000.0
|
||
info["density"] = info["num_vehicles"] / total_length_km if total_length_km > 0 else 0.0
|
||
|
||
# 收集急刹车数据
|
||
brake_decels = []
|
||
try:
|
||
for veh_id in traci.vehicle.getIDList():
|
||
accel = traci.vehicle.getAcceleration(veh_id)
|
||
if accel < -self.d_th:
|
||
brake_decels.append(abs(accel))
|
||
except Exception:
|
||
pass
|
||
info["brake_decels"] = brake_decels
|
||
info["num_hard_brakes"] = len(brake_decels)
|
||
|
||
try:
|
||
info["sim_time"] = traci.simulation.getTime()
|
||
except Exception:
|
||
info["sim_time"] = 0.0
|
||
|
||
return info
|
||
|
||
# ==================== 奖励函数 (多目标) ====================
|
||
|
||
def _calculate_reward(self, info: Dict) -> float:
|
||
"""
|
||
综合多目标奖励函数:
|
||
R(t) = w1*R_flow + w2*R_var + w3(ρ)*R_brake + w4*R_penalty
|
||
|
||
R_flow: 通行效率奖励 (归一化流量)
|
||
R_var: 速度稳定性惩罚 (速度标准差)
|
||
R_brake: 急刹车惩罚 (密度自适应权重)
|
||
R_penalty: 控制平滑度惩罚 (限速变化)
|
||
"""
|
||
# 1. 通行效率奖励
|
||
q_t = info["throughput"]
|
||
R_flow = q_t / self.C_max
|
||
|
||
# 2. 速度稳定性惩罚
|
||
speed_std = info["speed_std"]
|
||
R_var = -speed_std / self.v_limit
|
||
|
||
# 3. 急刹车惩罚 (密度自适应权重)
|
||
rho_t = info["density"]
|
||
w3 = self.w_base + (self.w_max - self.w_base) / (1 + np.exp(-self.k_sigmoid * (rho_t - self.rho_critical)))
|
||
|
||
brake_decels = info["brake_decels"]
|
||
total_vehicles = max(info["num_vehicles"], 1) # 防止除以0
|
||
if brake_decels:
|
||
# 先求所有急刹车惩罚的总和,再除以路网总车辆数
|
||
sum_brake_penalty = sum([max(0, (d - self.d_th) / (self.d_max - self.d_th)) for d in brake_decels])
|
||
brake_penalty = sum_brake_penalty / total_vehicles
|
||
else:
|
||
brake_penalty = 0.0
|
||
R_brake = -brake_penalty
|
||
|
||
# brake_decels = info["brake_decels"]
|
||
# if brake_decels:
|
||
# brake_penalty = np.mean([max(0, (d - self.d_th) / (self.d_max - self.d_th)) for d in brake_decels])
|
||
# else:
|
||
# brake_penalty = 0.0
|
||
# R_brake = -brake_penalty
|
||
|
||
# 4. 控制平滑度惩罚
|
||
vsl_change = np.abs(self.current_zone_speeds - self._prev_zone_speeds)
|
||
# R_penalty = -np.mean(vsl_change) / self.delta_vsl_max
|
||
max_vsl_change = np.max(vsl_change)
|
||
R_penalty = -max_vsl_change / self.delta_vsl_max
|
||
|
||
# 综合奖励
|
||
reward = self.w1 * R_flow + self.w2 * R_var + w3 * R_brake + self.w4 * R_penalty
|
||
|
||
return float(reward * 10.0)
|
||
|