242 lines
7.9 KiB
Python
242 lines
7.9 KiB
Python
"""
|
|
多线程模型对比测试
|
|
同时测试PPO、APPO和无控制基线
|
|
"""
|
|
import os
|
|
import yaml
|
|
import numpy as np
|
|
import torch
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
from tqdm import tqdm
|
|
import matplotlib.pyplot as plt
|
|
|
|
from incident_vsl_environment import IncidentVSLEnvironment
|
|
from ppo_agent import PPOAgent
|
|
from appo_agent import APPOAgent
|
|
|
|
|
|
def test_baseline(config, episode_id, seed):
|
|
"""测试无控制基线"""
|
|
np.random.seed(seed + episode_id)
|
|
torch.manual_seed(seed + episode_id)
|
|
|
|
env = IncidentVSLEnvironment(config)
|
|
state = env.reset()
|
|
episode_reward = 0
|
|
episode_throughput = 0
|
|
done = False
|
|
step = 0
|
|
|
|
while not done:
|
|
action = env.action_dim - 1 # 最高速度
|
|
next_state, reward, done, info = env.step(action)
|
|
episode_reward += reward
|
|
episode_throughput += info['throughput']
|
|
state = next_state
|
|
step += 1
|
|
|
|
return episode_reward, episode_throughput / step
|
|
|
|
|
|
def test_ppo(config, agent, episode_id, seed):
|
|
"""测试PPO"""
|
|
np.random.seed(seed + episode_id)
|
|
torch.manual_seed(seed + episode_id)
|
|
|
|
env = IncidentVSLEnvironment(config)
|
|
state = env.reset()
|
|
episode_reward = 0
|
|
episode_throughput = 0
|
|
done = False
|
|
step = 0
|
|
|
|
while not done:
|
|
action, _, _ = agent.select_action(state, deterministic=True)
|
|
next_state, reward, done, info = env.step(action)
|
|
episode_reward += reward
|
|
episode_throughput += info['throughput']
|
|
state = next_state
|
|
step += 1
|
|
|
|
return episode_reward, episode_throughput / step
|
|
|
|
|
|
def test_appo(config, agent, episode_id, seed):
|
|
"""测试APPO"""
|
|
np.random.seed(seed + episode_id)
|
|
torch.manual_seed(seed + episode_id)
|
|
|
|
env = IncidentVSLEnvironment(config)
|
|
state = env.reset()
|
|
episode_reward = 0
|
|
episode_throughput = 0
|
|
done = False
|
|
step = 0
|
|
|
|
while not done:
|
|
action, _, _ = agent.select_action(state, deterministic=True)
|
|
next_state, reward, done, info = env.step(action)
|
|
episode_reward += reward
|
|
episode_throughput += info['throughput']
|
|
state = next_state
|
|
step += 1
|
|
|
|
return episode_reward, episode_throughput / step
|
|
|
|
|
|
def load_model(model_type, config, state_dim, action_dim):
|
|
"""加载模型"""
|
|
agent_config = config.get('agent', {})
|
|
|
|
if isinstance(action_dim, int):
|
|
num_actions = action_dim
|
|
else:
|
|
num_actions = action_dim
|
|
|
|
if model_type == 'ppo':
|
|
agent = PPOAgent(
|
|
state_dim=state_dim,
|
|
action_dims=num_actions if isinstance(num_actions, list) else [5] * 3,
|
|
hidden_layers=agent_config.get('hidden_layers', [256, 256]),
|
|
device=agent_config.get('device', 'cuda')
|
|
)
|
|
checkpoint_dir = config['training']['checkpoint_dir']
|
|
else: # appo
|
|
agent = APPOAgent(
|
|
state_dim=state_dim,
|
|
num_actions=num_actions,
|
|
hidden_dim=agent_config.get('hidden_dim', 256),
|
|
num_heads=agent_config.get('num_heads', 4),
|
|
num_attention_layers=agent_config.get('num_attention_layers', 2),
|
|
device=agent_config.get('device', 'cuda')
|
|
)
|
|
checkpoint_dir = 'checkpoints_appo'
|
|
|
|
latest_run = max([d for d in os.listdir(checkpoint_dir) if os.path.isdir(os.path.join(checkpoint_dir, d))])
|
|
run_dir = os.path.join(checkpoint_dir, latest_run)
|
|
|
|
best_model = os.path.join(run_dir, 'model_best.pt')
|
|
if os.path.exists(best_model):
|
|
model_path = best_model
|
|
else:
|
|
model_files = [f for f in os.listdir(run_dir) if f.startswith('model_ep') and f.endswith('.pt')]
|
|
episodes = [int(f.replace('model_ep', '').replace('.pt', '')) for f in model_files]
|
|
max_ep = max(episodes)
|
|
model_path = os.path.join(run_dir, f'model_ep{max_ep}.pt')
|
|
|
|
agent.load(model_path)
|
|
return agent, model_path
|
|
|
|
|
|
def main():
|
|
with open('config_incident_vsl.yaml', 'r', encoding='utf-8') as f:
|
|
config = yaml.safe_load(f)
|
|
|
|
test_config = config['testing']
|
|
num_episodes = test_config.get('num_episodes', 10)
|
|
num_workers = test_config.get('num_workers', 4)
|
|
seed = 42
|
|
|
|
env = IncidentVSLEnvironment(config)
|
|
state_dim = env.state_dim
|
|
action_dim = env.action_dim
|
|
|
|
print("=" * 70)
|
|
print("多线程模型对比测试")
|
|
print("=" * 70)
|
|
print(f"测试回合数: {num_episodes}")
|
|
print(f"并行线程数: {num_workers}")
|
|
print()
|
|
|
|
# 加载模型
|
|
print("加载模型...")
|
|
appo_agent, appo_path = load_model('appo', config, state_dim, action_dim)
|
|
print(f"APPO模型: {appo_path}")
|
|
print()
|
|
|
|
results = {
|
|
'baseline': {'rewards': [], 'throughputs': []},
|
|
'appo': {'rewards': [], 'throughputs': []}
|
|
}
|
|
|
|
# 测试无控制基线
|
|
print("测试无控制基线...")
|
|
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
|
futures = [executor.submit(test_baseline, config, i, seed) for i in range(num_episodes)]
|
|
for future in tqdm(as_completed(futures), total=num_episodes):
|
|
reward, throughput = future.result()
|
|
results['baseline']['rewards'].append(reward)
|
|
results['baseline']['throughputs'].append(throughput)
|
|
|
|
# 测试APPO
|
|
print("测试APPO...")
|
|
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
|
futures = [executor.submit(test_appo, config, appo_agent, i, seed) for i in range(num_episodes)]
|
|
for future in tqdm(as_completed(futures), total=num_episodes):
|
|
reward, throughput = future.result()
|
|
results['appo']['rewards'].append(reward)
|
|
results['appo']['throughputs'].append(throughput)
|
|
|
|
# 打印结果
|
|
print("\n" + "=" * 70)
|
|
print("测试结果")
|
|
print("=" * 70)
|
|
|
|
for model_name in ['baseline', 'appo']:
|
|
rewards = results[model_name]['rewards']
|
|
throughputs = results[model_name]['throughputs']
|
|
print(f"\n{model_name.upper()}:")
|
|
print(f" 奖励: {np.mean(rewards):.2f} ± {np.std(rewards):.2f}")
|
|
print(f" 通行量: {np.mean(throughputs):.1f} ± {np.std(throughputs):.1f} veh/h")
|
|
|
|
# 计算提升
|
|
print("\n" + "=" * 70)
|
|
print("性能提升")
|
|
print("=" * 70)
|
|
|
|
baseline_reward = np.mean(results['baseline']['rewards'])
|
|
baseline_throughput = np.mean(results['baseline']['throughputs'])
|
|
|
|
reward_imp = ((np.mean(results['appo']['rewards']) - baseline_reward) / abs(baseline_reward)) * 100
|
|
throughput_imp = ((np.mean(results['appo']['throughputs']) - baseline_throughput) / baseline_throughput) * 100
|
|
print(f"\nAPPO vs Baseline:")
|
|
print(f" 奖励提升: {reward_imp:+.2f}%")
|
|
print(f" 通行量提升: {throughput_imp:+.2f}%")
|
|
|
|
# 绘图
|
|
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
|
|
|
|
models = ['Baseline', 'APPO']
|
|
colors = ['gray', 'red']
|
|
|
|
# 奖励对比
|
|
for i, (model_name, color) in enumerate(zip(['baseline', 'appo'], colors)):
|
|
axes[0].bar(i, np.mean(results[model_name]['rewards']),
|
|
yerr=np.std(results[model_name]['rewards']),
|
|
color=color, alpha=0.7, capsize=5)
|
|
axes[0].set_xticks(range(2))
|
|
axes[0].set_xticklabels(models)
|
|
axes[0].set_ylabel('Average Reward')
|
|
axes[0].set_title('Reward Comparison')
|
|
axes[0].grid(True, alpha=0.3)
|
|
|
|
# 通行量对比
|
|
for i, (model_name, color) in enumerate(zip(['baseline', 'appo'], colors)):
|
|
axes[1].bar(i, np.mean(results[model_name]['throughputs']),
|
|
yerr=np.std(results[model_name]['throughputs']),
|
|
color=color, alpha=0.7, capsize=5)
|
|
axes[1].set_xticks(range(2))
|
|
axes[1].set_xticklabels(models)
|
|
axes[1].set_ylabel('Average Throughput (veh/h)')
|
|
axes[1].set_title('Throughput Comparison')
|
|
axes[1].grid(True, alpha=0.3)
|
|
|
|
plt.tight_layout()
|
|
plt.savefig('model_comparison.png', dpi=150, bbox_inches='tight')
|
|
print(f"\n对比图已保存: model_comparison.png")
|
|
print("=" * 70)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|