"""PPO策略快速测试 - 30轮""" import yaml import numpy as np from tqdm import tqdm from sumo_vsl_environment import SUMOVSLEnvironment from ppo_agent import PPOAgent with open("config_sumo_vsl.yaml", "r", encoding="utf-8") as f: config = yaml.safe_load(f) config["training"]["num_episodes"] = 30 env = SUMOVSLEnvironment(config) agent_config = config.get("agent", {}) agent = PPOAgent( state_dim=env.state_dim, action_dims=[5] * env.num_control_zones, hidden_layers=agent_config.get("hidden_layers", [256, 256, 128]), learning_rate=agent_config.get("learning_rate", 3e-4), device=agent_config.get("device", "cuda") ) print(f"开始PPO测试 - 30轮") print(f"状态维度: {env.state_dim}, 动作空间: {env.num_control_zones}x5\n") try: for ep in range(1, 31): state = env.reset(seed=42 + ep) ep_reward = 0 done = False pbar = tqdm(total=env.episode_length, desc=f"Ep {ep}/30", leave=False) while not done: action, log_prob, value = agent.select_action(state, deterministic=False) next_state, reward, done, info = env.step(action) agent.store_transition(state, action, reward, value, log_prob, done) ep_reward += reward state = next_state pbar.update(1) pbar.close() agent.update(0.0) print(f"Ep {ep}: Reward={ep_reward:.2f}, TP={info['throughput']:.0f}, Speed={info['mean_speed_kmh']:.1f}") except KeyboardInterrupt: print("\n测试中断") finally: env.close() print("\n测试完成!")