ctm-dqn/compare_models.py

242 lines
7.9 KiB
Python

"""
多线程模型对比测试
同时测试PPO、APPO和无控制基线
"""
import os
import yaml
import numpy as np
import torch
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import matplotlib.pyplot as plt
from incident_vsl_environment import IncidentVSLEnvironment
from ppo_agent import PPOAgent
from appo_agent import APPOAgent
def test_baseline(config, episode_id, seed):
"""测试无控制基线"""
np.random.seed(seed + episode_id)
torch.manual_seed(seed + episode_id)
env = IncidentVSLEnvironment(config)
state = env.reset()
episode_reward = 0
episode_throughput = 0
done = False
step = 0
while not done:
action = env.action_dim - 1 # 最高速度
next_state, reward, done, info = env.step(action)
episode_reward += reward
episode_throughput += info['throughput']
state = next_state
step += 1
return episode_reward, episode_throughput / step
def test_ppo(config, agent, episode_id, seed):
"""测试PPO"""
np.random.seed(seed + episode_id)
torch.manual_seed(seed + episode_id)
env = IncidentVSLEnvironment(config)
state = env.reset()
episode_reward = 0
episode_throughput = 0
done = False
step = 0
while not done:
action, _, _ = agent.select_action(state, deterministic=True)
next_state, reward, done, info = env.step(action)
episode_reward += reward
episode_throughput += info['throughput']
state = next_state
step += 1
return episode_reward, episode_throughput / step
def test_appo(config, agent, episode_id, seed):
"""测试APPO"""
np.random.seed(seed + episode_id)
torch.manual_seed(seed + episode_id)
env = IncidentVSLEnvironment(config)
state = env.reset()
episode_reward = 0
episode_throughput = 0
done = False
step = 0
while not done:
action, _, _ = agent.select_action(state, deterministic=True)
next_state, reward, done, info = env.step(action)
episode_reward += reward
episode_throughput += info['throughput']
state = next_state
step += 1
return episode_reward, episode_throughput / step
def load_model(model_type, config, state_dim, action_dim):
"""加载模型"""
agent_config = config.get('agent', {})
if isinstance(action_dim, int):
num_actions = action_dim
else:
num_actions = action_dim
if model_type == 'ppo':
agent = PPOAgent(
state_dim=state_dim,
action_dims=num_actions if isinstance(num_actions, list) else [5] * 3,
hidden_layers=agent_config.get('hidden_layers', [256, 256]),
device=agent_config.get('device', 'cuda')
)
checkpoint_dir = config['training']['checkpoint_dir']
else: # appo
agent = APPOAgent(
state_dim=state_dim,
num_actions=num_actions,
hidden_dim=agent_config.get('hidden_dim', 256),
num_heads=agent_config.get('num_heads', 4),
num_attention_layers=agent_config.get('num_attention_layers', 2),
device=agent_config.get('device', 'cuda')
)
checkpoint_dir = 'checkpoints_appo'
latest_run = max([d for d in os.listdir(checkpoint_dir) if os.path.isdir(os.path.join(checkpoint_dir, d))])
run_dir = os.path.join(checkpoint_dir, latest_run)
best_model = os.path.join(run_dir, 'model_best.pt')
if os.path.exists(best_model):
model_path = best_model
else:
model_files = [f for f in os.listdir(run_dir) if f.startswith('model_ep') and f.endswith('.pt')]
episodes = [int(f.replace('model_ep', '').replace('.pt', '')) for f in model_files]
max_ep = max(episodes)
model_path = os.path.join(run_dir, f'model_ep{max_ep}.pt')
agent.load(model_path)
return agent, model_path
def main():
with open('config_incident_vsl.yaml', 'r', encoding='utf-8') as f:
config = yaml.safe_load(f)
test_config = config['testing']
num_episodes = test_config.get('num_episodes', 10)
num_workers = test_config.get('num_workers', 4)
seed = 42
env = IncidentVSLEnvironment(config)
state_dim = env.state_dim
action_dim = env.action_dim
print("=" * 70)
print("多线程模型对比测试")
print("=" * 70)
print(f"测试回合数: {num_episodes}")
print(f"并行线程数: {num_workers}")
print()
# 加载模型
print("加载模型...")
appo_agent, appo_path = load_model('appo', config, state_dim, action_dim)
print(f"APPO模型: {appo_path}")
print()
results = {
'baseline': {'rewards': [], 'throughputs': []},
'appo': {'rewards': [], 'throughputs': []}
}
# 测试无控制基线
print("测试无控制基线...")
with ThreadPoolExecutor(max_workers=num_workers) as executor:
futures = [executor.submit(test_baseline, config, i, seed) for i in range(num_episodes)]
for future in tqdm(as_completed(futures), total=num_episodes):
reward, throughput = future.result()
results['baseline']['rewards'].append(reward)
results['baseline']['throughputs'].append(throughput)
# 测试APPO
print("测试APPO...")
with ThreadPoolExecutor(max_workers=num_workers) as executor:
futures = [executor.submit(test_appo, config, appo_agent, i, seed) for i in range(num_episodes)]
for future in tqdm(as_completed(futures), total=num_episodes):
reward, throughput = future.result()
results['appo']['rewards'].append(reward)
results['appo']['throughputs'].append(throughput)
# 打印结果
print("\n" + "=" * 70)
print("测试结果")
print("=" * 70)
for model_name in ['baseline', 'appo']:
rewards = results[model_name]['rewards']
throughputs = results[model_name]['throughputs']
print(f"\n{model_name.upper()}:")
print(f" 奖励: {np.mean(rewards):.2f} ± {np.std(rewards):.2f}")
print(f" 通行量: {np.mean(throughputs):.1f} ± {np.std(throughputs):.1f} veh/h")
# 计算提升
print("\n" + "=" * 70)
print("性能提升")
print("=" * 70)
baseline_reward = np.mean(results['baseline']['rewards'])
baseline_throughput = np.mean(results['baseline']['throughputs'])
reward_imp = ((np.mean(results['appo']['rewards']) - baseline_reward) / abs(baseline_reward)) * 100
throughput_imp = ((np.mean(results['appo']['throughputs']) - baseline_throughput) / baseline_throughput) * 100
print(f"\nAPPO vs Baseline:")
print(f" 奖励提升: {reward_imp:+.2f}%")
print(f" 通行量提升: {throughput_imp:+.2f}%")
# 绘图
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
models = ['Baseline', 'APPO']
colors = ['gray', 'red']
# 奖励对比
for i, (model_name, color) in enumerate(zip(['baseline', 'appo'], colors)):
axes[0].bar(i, np.mean(results[model_name]['rewards']),
yerr=np.std(results[model_name]['rewards']),
color=color, alpha=0.7, capsize=5)
axes[0].set_xticks(range(2))
axes[0].set_xticklabels(models)
axes[0].set_ylabel('Average Reward')
axes[0].set_title('Reward Comparison')
axes[0].grid(True, alpha=0.3)
# 通行量对比
for i, (model_name, color) in enumerate(zip(['baseline', 'appo'], colors)):
axes[1].bar(i, np.mean(results[model_name]['throughputs']),
yerr=np.std(results[model_name]['throughputs']),
color=color, alpha=0.7, capsize=5)
axes[1].set_xticks(range(2))
axes[1].set_xticklabels(models)
axes[1].set_ylabel('Average Throughput (veh/h)')
axes[1].set_title('Throughput Comparison')
axes[1].grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('model_comparison.png', dpi=150, bbox_inches='tight')
print(f"\n对比图已保存: model_comparison.png")
print("=" * 70)
if __name__ == '__main__':
main()