ctm-dqn/test_ppo_quick.py

55 lines
1.5 KiB
Python

"""PPO策略快速测试 - 30轮"""
import yaml
import numpy as np
from tqdm import tqdm
from sumo_vsl_environment import SUMOVSLEnvironment
from ppo_agent import PPOAgent
with open("config_sumo_vsl.yaml", "r", encoding="utf-8") as f:
config = yaml.safe_load(f)
config["training"]["num_episodes"] = 30
env = SUMOVSLEnvironment(config)
agent_config = config.get("agent", {})
agent = PPOAgent(
state_dim=env.state_dim,
action_dims=[5] * env.num_control_zones,
hidden_layers=agent_config.get("hidden_layers", [256, 256, 128]),
learning_rate=agent_config.get("learning_rate", 3e-4),
device=agent_config.get("device", "cuda")
)
print(f"开始PPO测试 - 30轮")
print(f"状态维度: {env.state_dim}, 动作空间: {env.num_control_zones}x5\n")
try:
for ep in range(1, 31):
state = env.reset(seed=42 + ep)
ep_reward = 0
done = False
pbar = tqdm(total=env.episode_length, desc=f"Ep {ep}/30", leave=False)
while not done:
action, log_prob, value = agent.select_action(state, deterministic=False)
next_state, reward, done, info = env.step(action)
agent.store_transition(state, action, reward, value, log_prob, done)
ep_reward += reward
state = next_state
pbar.update(1)
pbar.close()
agent.update(0.0)
print(f"Ep {ep}: Reward={ep_reward:.2f}, TP={info['throughput']:.0f}, Speed={info['mean_speed_kmh']:.1f}")
except KeyboardInterrupt:
print("\n测试中断")
finally:
env.close()
print("\n测试完成!")