ctm-dqn/show_ppo_decisions.py

"""显示PPO控制决策"""
import numpy as np
import yaml
from sumo_vsl_environment import SUMOVSLEnvironment
from ppo_agent import PPOAgent

with open('config_sumo_vsl.yaml', 'r', encoding='utf-8') as f:
    config = yaml.safe_load(f)

env = SUMOVSLEnvironment(config)
agent = PPOAgent(
    state_dim=env.state_dim,
    action_dims=[5] * env.num_control_zones,
    hidden_layers=[256, 256, 128],
    learning_rate=3e-4,
    device='cuda'
)

try:
    agent.load('checkpoints_sumo_ppo/best_model.pt')
    print('已加载训练好的PPO模型\n')
except:
    print('警告: 未找到训练好的模型\n')

state = env.reset(seed=42)
speed_map = {0: 40, 1: 60, 2: 80, 3: 100, 4: 120}

print('PPO控制决策 (全部60步):')
print('步数 | Zone0 | Zone1 | Zone2 | Zone3 | Zone4')
print('-' * 50)

for step in range(60):
    action, _, _ = agent.select_action(state, deterministic=True)
    speeds = [speed_map[int(a)] for a in action]
    print(f'{step:3d}  | {speeds[0]:3d}km | {speeds[1]:3d}km | {speeds[2]:3d}km | {speeds[3]:3d}km | {speeds[4]:3d}km')

    state, _, done, _ = env.step(action)
    if done:
        break

env.close()