ctm-dqn/show_ppo_decisions.py

42 lines
1.1 KiB
Python

"""显示PPO控制决策"""
import numpy as np
import yaml
from sumo_vsl_environment import SUMOVSLEnvironment
from ppo_agent import PPOAgent
with open('config_sumo_vsl.yaml', 'r', encoding='utf-8') as f:
config = yaml.safe_load(f)
env = SUMOVSLEnvironment(config)
agent = PPOAgent(
state_dim=env.state_dim,
action_dims=[5] * env.num_control_zones,
hidden_layers=[256, 256, 128],
learning_rate=3e-4,
device='cuda'
)
try:
agent.load('checkpoints_sumo_ppo/best_model.pt')
print('已加载训练好的PPO模型\n')
except:
print('警告: 未找到训练好的模型\n')
state = env.reset(seed=42)
speed_map = {0: 40, 1: 60, 2: 80, 3: 100, 4: 120}
print('PPO控制决策 (全部60步):')
print('步数 | Zone0 | Zone1 | Zone2 | Zone3 | Zone4')
print('-' * 50)
for step in range(60):
action, _, _ = agent.select_action(state, deterministic=True)
speeds = [speed_map[int(a)] for a in action]
print(f'{step:3d} | {speeds[0]:3d}km | {speeds[1]:3d}km | {speeds[2]:3d}km | {speeds[3]:3d}km | {speeds[4]:3d}km')
state, _, done, _ = env.step(action)
if done:
break
env.close()