diff --git a/train_sumo_appo.py b/train_sumo_appo.py index 2a5361b..d722a78 100644 --- a/train_sumo_appo.py +++ b/train_sumo_appo.py @@ -82,6 +82,7 @@ def train_sumo_appo(): episode_rewards = [] episode_throughputs = [] episode_mean_speeds = [] + episode_hard_brakes = [] policy_losses = [] value_losses = [] entropies = [] @@ -97,6 +98,7 @@ def train_sumo_appo(): episode_reward = 0 episode_throughput = 0 episode_speed = 0 + episode_brakes = 0 done = False step = 0 @@ -115,6 +117,7 @@ def train_sumo_appo(): episode_reward += reward episode_throughput += info["throughput"] episode_speed += info["mean_speed_kmh"] + episode_brakes += info["num_hard_brakes"] state = next_state step += 1 @@ -143,15 +146,16 @@ def train_sumo_appo(): episode_rewards.append(episode_reward) episode_throughputs.append(avg_tp) episode_mean_speeds.append(avg_speed) + episode_hard_brakes.append(episode_brakes) if train_stats: policy_losses.append(train_stats["policy_loss"]) value_losses.append(train_stats["value_loss"]) entropies.append(train_stats["entropy"]) - logger.log(episode, episode_reward, avg_tp, avg_speed, + logger.log(episode, episode_reward, avg_tp, avg_speed, episode_brakes, train_stats["policy_loss"], train_stats["value_loss"], train_stats["entropy"]) else: - logger.log(episode, episode_reward, avg_tp, avg_speed) + logger.log(episode, episode_reward, avg_tp, avg_speed, episode_brakes) # 保存最佳模型 if episode_reward > best_reward: @@ -185,7 +189,7 @@ def train_sumo_appo(): # 绘制训练曲线 _plot_training_curves( - episode_rewards, episode_throughputs, episode_mean_speeds, + episode_rewards, episode_throughputs, episode_mean_speeds, episode_hard_brakes, policy_losses, value_losses, save_path=os.path.join(log_dir, "training_curves.png"), ) @@ -199,11 +203,11 @@ def train_sumo_appo(): def _plot_training_curves( - rewards, throughputs, mean_speeds, policy_losses, value_losses, + rewards, throughputs, mean_speeds, hard_brakes, policy_losses, value_losses, save_path: str, ): """绘制训练曲线""" - fig, axes = plt.subplots(2, 3, figsize=(18, 10)) + fig, axes = plt.subplots(2, 4, figsize=(24, 10)) window = 20 @@ -237,6 +241,16 @@ def _plot_training_curves( axes[0, 2].set_title("Mean Speed") axes[0, 2].grid(True, alpha=0.3) + # Hard Brakes + axes[0, 3].plot(hard_brakes, alpha=0.4, color="red") + if len(hard_brakes) > window: + ma = np.convolve(hard_brakes, np.ones(window) / window, mode="valid") + axes[0, 3].plot(range(window - 1, len(hard_brakes)), ma, "r-", linewidth=2) + axes[0, 3].set_xlabel("Episode") + axes[0, 3].set_ylabel("Hard Brakes Count") + axes[0, 3].set_title("Hard Brakes") + axes[0, 3].grid(True, alpha=0.3) + # Policy Loss if policy_losses: axes[1, 0].plot(policy_losses, "b-", alpha=0.6) @@ -262,11 +276,14 @@ def _plot_training_curves( f"Best Reward: {max(rewards):.2f}\n" f"Final Avg Reward: {np.mean(rewards[-20:]):.2f}\n" f"Final Avg Throughput: {np.mean(throughputs[-20:]):.1f}\n" - f"Final Avg Speed: {np.mean(mean_speeds[-20:]):.1f} km/h" + f"Final Avg Speed: {np.mean(mean_speeds[-20:]):.1f} km/h\n" + f"Final Avg Hard Brakes: {np.mean(hard_brakes[-20:]):.1f}" ) axes[1, 2].text(0.1, 0.5, summary, fontsize=12, family="monospace", verticalalignment="center", transform=axes[1, 2].transAxes) + axes[1, 3].axis("off") + plt.tight_layout() plt.savefig(save_path, dpi=150, bbox_inches="tight") print(f"训练曲线已保存: {save_path}") diff --git a/train_sumo_dqn.py b/train_sumo_dqn.py index debe451..2ac55e6 100644 --- a/train_sumo_dqn.py +++ b/train_sumo_dqn.py @@ -74,6 +74,7 @@ def train_sumo_dqn(): episode_rewards = [] episode_throughputs = [] episode_mean_speeds = [] + episode_hard_brakes = [] losses = [] best_reward = -float("inf") @@ -86,6 +87,7 @@ def train_sumo_dqn(): episode_reward = 0 episode_throughput = 0 episode_speed = 0 + episode_brakes = 0 done = False step = 0 @@ -106,6 +108,7 @@ def train_sumo_dqn(): episode_reward += reward episode_throughput += info["throughput"] episode_speed += info["mean_speed_kmh"] + episode_brakes += info["num_hard_brakes"] state = next_state step += 1 @@ -123,9 +126,10 @@ def train_sumo_dqn(): episode_rewards.append(episode_reward) episode_throughputs.append(avg_tp) episode_mean_speeds.append(avg_speed) + episode_hard_brakes.append(episode_brakes) loss_val = np.mean(losses[-100:]) if losses else None - logger.log(episode, episode_reward, avg_tp, avg_speed, value_loss=loss_val) + logger.log(episode, episode_reward, avg_tp, avg_speed, episode_brakes, value_loss=loss_val) if episode_reward > best_reward: best_reward = episode_reward @@ -152,7 +156,7 @@ def train_sumo_dqn(): agent.save(os.path.join(checkpoint_dir, f"model_ep{num_episodes}.pt")) # 绘制训练曲线 - fig, axes = plt.subplots(2, 2, figsize=(15, 10)) + fig, axes = plt.subplots(2, 3, figsize=(18, 10)) axes[0, 0].plot(episode_rewards, alpha=0.6) window = 20 @@ -170,10 +174,19 @@ def train_sumo_dqn(): axes[0, 1].set_title('Throughput') axes[0, 1].grid(True, alpha=0.3) - axes[1, 0].plot(episode_mean_speeds, 'orange', alpha=0.6) + axes[0, 2].plot(episode_mean_speeds, 'orange', alpha=0.6) + axes[0, 2].set_xlabel('Episode') + axes[0, 2].set_ylabel('Mean Speed (km/h)') + axes[0, 2].set_title('Mean Speed') + axes[0, 2].grid(True, alpha=0.3) + + axes[1, 0].plot(episode_hard_brakes, 'r-', alpha=0.6) + if len(episode_hard_brakes) > window: + ma = np.convolve(episode_hard_brakes, np.ones(window)/window, mode='valid') + axes[1, 0].plot(range(window-1, len(episode_hard_brakes)), ma, 'b-', linewidth=2) axes[1, 0].set_xlabel('Episode') - axes[1, 0].set_ylabel('Mean Speed (km/h)') - axes[1, 0].set_title('Mean Speed') + axes[1, 0].set_ylabel('Hard Brakes Count') + axes[1, 0].set_title('Hard Brakes') axes[1, 0].grid(True, alpha=0.3) if losses: @@ -183,6 +196,8 @@ def train_sumo_dqn(): axes[1, 1].set_title('Training Loss') axes[1, 1].grid(True, alpha=0.3) + axes[1, 2].axis('off') + plt.tight_layout() plt.savefig(os.path.join(log_dir, "training_curves.png"), dpi=150) print(f"训练曲线已保存: {os.path.join(log_dir, 'training_curves.png')}") diff --git a/train_sumo_ppo.py b/train_sumo_ppo.py index 6fe57f0..6ee4628 100644 --- a/train_sumo_ppo.py +++ b/train_sumo_ppo.py @@ -84,6 +84,7 @@ def train_sumo_ppo(): episode_rewards = [] episode_throughputs = [] episode_mean_speeds = [] + episode_hard_brakes = [] policy_losses = [] value_losses = [] entropies = [] @@ -99,6 +100,7 @@ def train_sumo_ppo(): episode_reward = 0 episode_throughput = 0 episode_speed = 0 + episode_brakes = 0 done = False step = 0 @@ -117,6 +119,7 @@ def train_sumo_ppo(): episode_reward += reward episode_throughput += info["throughput"] episode_speed += info["mean_speed_kmh"] + episode_brakes += info["num_hard_brakes"] state = next_state step += 1 @@ -145,15 +148,16 @@ def train_sumo_ppo(): episode_rewards.append(episode_reward) episode_throughputs.append(avg_tp) episode_mean_speeds.append(avg_speed) + episode_hard_brakes.append(episode_brakes) if train_stats: policy_losses.append(train_stats["policy_loss"]) value_losses.append(train_stats["value_loss"]) entropies.append(train_stats["entropy"]) - logger.log(episode, episode_reward, avg_tp, avg_speed, + logger.log(episode, episode_reward, avg_tp, avg_speed, episode_brakes, train_stats["policy_loss"], train_stats["value_loss"], train_stats["entropy"]) else: - logger.log(episode, episode_reward, avg_tp, avg_speed) + logger.log(episode, episode_reward, avg_tp, avg_speed, episode_brakes) # 保存最佳模型 if episode_reward > best_reward: @@ -187,7 +191,7 @@ def train_sumo_ppo(): # 绘制训练曲线 _plot_training_curves( - episode_rewards, episode_throughputs, episode_mean_speeds, + episode_rewards, episode_throughputs, episode_mean_speeds, episode_hard_brakes, policy_losses, value_losses, save_path=os.path.join(log_dir, "training_curves.png"), ) @@ -201,11 +205,11 @@ def train_sumo_ppo(): def _plot_training_curves( - rewards, throughputs, mean_speeds, policy_losses, value_losses, + rewards, throughputs, mean_speeds, hard_brakes, policy_losses, value_losses, save_path: str, ): """绘制训练曲线""" - fig, axes = plt.subplots(2, 3, figsize=(18, 10)) + fig, axes = plt.subplots(2, 4, figsize=(24, 10)) window = 20 @@ -239,6 +243,16 @@ def _plot_training_curves( axes[0, 2].set_title("Mean Speed") axes[0, 2].grid(True, alpha=0.3) + # Hard Brakes + axes[0, 3].plot(hard_brakes, alpha=0.4, color="red") + if len(hard_brakes) > window: + ma = np.convolve(hard_brakes, np.ones(window) / window, mode="valid") + axes[0, 3].plot(range(window - 1, len(hard_brakes)), ma, "r-", linewidth=2) + axes[0, 3].set_xlabel("Episode") + axes[0, 3].set_ylabel("Hard Brakes Count") + axes[0, 3].set_title("Hard Brakes") + axes[0, 3].grid(True, alpha=0.3) + # Policy Loss if policy_losses: axes[1, 0].plot(policy_losses, "b-", alpha=0.6) @@ -264,11 +278,14 @@ def _plot_training_curves( f"Best Reward: {max(rewards):.2f}\n" f"Final Avg Reward: {np.mean(rewards[-20:]):.2f}\n" f"Final Avg Throughput: {np.mean(throughputs[-20:]):.1f}\n" - f"Final Avg Speed: {np.mean(mean_speeds[-20:]):.1f} km/h" + f"Final Avg Speed: {np.mean(mean_speeds[-20:]):.1f} km/h\n" + f"Final Avg Hard Brakes: {np.mean(hard_brakes[-20:]):.1f}" ) axes[1, 2].text(0.1, 0.5, summary, fontsize=12, family="monospace", verticalalignment="center", transform=axes[1, 2].transAxes) + axes[1, 3].axis("off") + plt.tight_layout() plt.savefig(save_path, dpi=150, bbox_inches="tight") print(f"训练曲线已保存: {save_path}") diff --git a/training_logger.py b/training_logger.py index 1e2a27d..8ea528d 100644 --- a/training_logger.py +++ b/training_logger.py @@ -7,7 +7,7 @@ class TrainingLogger: def __init__(self, log_dir, model_name, resume=False): self.log_path = os.path.join(log_dir, f"{model_name}_training_log.csv") self.fieldnames = [ - "episode", "reward", "throughput", "mean_speed", + "episode", "reward", "throughput", "mean_speed", "hard_brakes", "policy_loss", "value_loss", "entropy" ] @@ -16,7 +16,7 @@ class TrainingLogger: writer = csv.DictWriter(f, fieldnames=self.fieldnames) writer.writeheader() - def log(self, episode, reward, throughput, mean_speed, + def log(self, episode, reward, throughput, mean_speed, hard_brakes=0, policy_loss=None, value_loss=None, entropy=None): with open(self.log_path, "a", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=self.fieldnames) @@ -25,6 +25,7 @@ class TrainingLogger: "reward": f"{reward:.4f}", "throughput": f"{throughput:.2f}", "mean_speed": f"{mean_speed:.2f}", + "hard_brakes": f"{hard_brakes:.0f}", "policy_loss": f"{policy_loss:.6f}" if policy_loss is not None else "", "value_loss": f"{value_loss:.6f}" if value_loss is not None else "", "entropy": f"{entropy:.6f}" if entropy is not None else ""