55 lines
1.5 KiB
Python
55 lines
1.5 KiB
Python
"""PPO策略快速测试 - 30轮"""
|
|
import yaml
|
|
import numpy as np
|
|
from tqdm import tqdm
|
|
from sumo_vsl_environment import SUMOVSLEnvironment
|
|
from ppo_agent import PPOAgent
|
|
|
|
with open("config_sumo_vsl.yaml", "r", encoding="utf-8") as f:
|
|
config = yaml.safe_load(f)
|
|
|
|
config["training"]["num_episodes"] = 30
|
|
|
|
env = SUMOVSLEnvironment(config)
|
|
agent_config = config.get("agent", {})
|
|
|
|
agent = PPOAgent(
|
|
state_dim=env.state_dim,
|
|
action_dims=[5] * env.num_control_zones,
|
|
hidden_layers=agent_config.get("hidden_layers", [256, 256, 128]),
|
|
learning_rate=agent_config.get("learning_rate", 3e-4),
|
|
device=agent_config.get("device", "cuda")
|
|
)
|
|
|
|
print(f"开始PPO测试 - 30轮")
|
|
print(f"状态维度: {env.state_dim}, 动作空间: {env.num_control_zones}x5\n")
|
|
|
|
try:
|
|
for ep in range(1, 31):
|
|
state = env.reset(seed=42 + ep)
|
|
ep_reward = 0
|
|
done = False
|
|
|
|
pbar = tqdm(total=env.episode_length, desc=f"Ep {ep}/30", leave=False)
|
|
|
|
while not done:
|
|
action, log_prob, value = agent.select_action(state, deterministic=False)
|
|
next_state, reward, done, info = env.step(action)
|
|
agent.store_transition(state, action, reward, value, log_prob, done)
|
|
|
|
ep_reward += reward
|
|
state = next_state
|
|
pbar.update(1)
|
|
|
|
pbar.close()
|
|
agent.update(0.0)
|
|
|
|
print(f"Ep {ep}: Reward={ep_reward:.2f}, TP={info['throughput']:.0f}, Speed={info['mean_speed_kmh']:.1f}")
|
|
|
|
except KeyboardInterrupt:
|
|
print("\n测试中断")
|
|
finally:
|
|
env.close()
|
|
|
|
print("\n测试完成!")
|