添加num_brakes曲线绘制
This commit is contained in:
parent
29aae92ad2
commit
b830631aa9
|
|
@ -82,6 +82,7 @@ def train_sumo_appo():
|
||||||
episode_rewards = []
|
episode_rewards = []
|
||||||
episode_throughputs = []
|
episode_throughputs = []
|
||||||
episode_mean_speeds = []
|
episode_mean_speeds = []
|
||||||
|
episode_hard_brakes = []
|
||||||
policy_losses = []
|
policy_losses = []
|
||||||
value_losses = []
|
value_losses = []
|
||||||
entropies = []
|
entropies = []
|
||||||
|
|
@ -97,6 +98,7 @@ def train_sumo_appo():
|
||||||
episode_reward = 0
|
episode_reward = 0
|
||||||
episode_throughput = 0
|
episode_throughput = 0
|
||||||
episode_speed = 0
|
episode_speed = 0
|
||||||
|
episode_brakes = 0
|
||||||
done = False
|
done = False
|
||||||
step = 0
|
step = 0
|
||||||
|
|
||||||
|
|
@ -115,6 +117,7 @@ def train_sumo_appo():
|
||||||
episode_reward += reward
|
episode_reward += reward
|
||||||
episode_throughput += info["throughput"]
|
episode_throughput += info["throughput"]
|
||||||
episode_speed += info["mean_speed_kmh"]
|
episode_speed += info["mean_speed_kmh"]
|
||||||
|
episode_brakes += info["num_hard_brakes"]
|
||||||
state = next_state
|
state = next_state
|
||||||
step += 1
|
step += 1
|
||||||
|
|
||||||
|
|
@ -143,15 +146,16 @@ def train_sumo_appo():
|
||||||
episode_rewards.append(episode_reward)
|
episode_rewards.append(episode_reward)
|
||||||
episode_throughputs.append(avg_tp)
|
episode_throughputs.append(avg_tp)
|
||||||
episode_mean_speeds.append(avg_speed)
|
episode_mean_speeds.append(avg_speed)
|
||||||
|
episode_hard_brakes.append(episode_brakes)
|
||||||
|
|
||||||
if train_stats:
|
if train_stats:
|
||||||
policy_losses.append(train_stats["policy_loss"])
|
policy_losses.append(train_stats["policy_loss"])
|
||||||
value_losses.append(train_stats["value_loss"])
|
value_losses.append(train_stats["value_loss"])
|
||||||
entropies.append(train_stats["entropy"])
|
entropies.append(train_stats["entropy"])
|
||||||
logger.log(episode, episode_reward, avg_tp, avg_speed,
|
logger.log(episode, episode_reward, avg_tp, avg_speed, episode_brakes,
|
||||||
train_stats["policy_loss"], train_stats["value_loss"], train_stats["entropy"])
|
train_stats["policy_loss"], train_stats["value_loss"], train_stats["entropy"])
|
||||||
else:
|
else:
|
||||||
logger.log(episode, episode_reward, avg_tp, avg_speed)
|
logger.log(episode, episode_reward, avg_tp, avg_speed, episode_brakes)
|
||||||
|
|
||||||
# 保存最佳模型
|
# 保存最佳模型
|
||||||
if episode_reward > best_reward:
|
if episode_reward > best_reward:
|
||||||
|
|
@ -185,7 +189,7 @@ def train_sumo_appo():
|
||||||
|
|
||||||
# 绘制训练曲线
|
# 绘制训练曲线
|
||||||
_plot_training_curves(
|
_plot_training_curves(
|
||||||
episode_rewards, episode_throughputs, episode_mean_speeds,
|
episode_rewards, episode_throughputs, episode_mean_speeds, episode_hard_brakes,
|
||||||
policy_losses, value_losses,
|
policy_losses, value_losses,
|
||||||
save_path=os.path.join(log_dir, "training_curves.png"),
|
save_path=os.path.join(log_dir, "training_curves.png"),
|
||||||
)
|
)
|
||||||
|
|
@ -199,11 +203,11 @@ def train_sumo_appo():
|
||||||
|
|
||||||
|
|
||||||
def _plot_training_curves(
|
def _plot_training_curves(
|
||||||
rewards, throughputs, mean_speeds, policy_losses, value_losses,
|
rewards, throughputs, mean_speeds, hard_brakes, policy_losses, value_losses,
|
||||||
save_path: str,
|
save_path: str,
|
||||||
):
|
):
|
||||||
"""绘制训练曲线"""
|
"""绘制训练曲线"""
|
||||||
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
|
fig, axes = plt.subplots(2, 4, figsize=(24, 10))
|
||||||
|
|
||||||
window = 20
|
window = 20
|
||||||
|
|
||||||
|
|
@ -237,6 +241,16 @@ def _plot_training_curves(
|
||||||
axes[0, 2].set_title("Mean Speed")
|
axes[0, 2].set_title("Mean Speed")
|
||||||
axes[0, 2].grid(True, alpha=0.3)
|
axes[0, 2].grid(True, alpha=0.3)
|
||||||
|
|
||||||
|
# Hard Brakes
|
||||||
|
axes[0, 3].plot(hard_brakes, alpha=0.4, color="red")
|
||||||
|
if len(hard_brakes) > window:
|
||||||
|
ma = np.convolve(hard_brakes, np.ones(window) / window, mode="valid")
|
||||||
|
axes[0, 3].plot(range(window - 1, len(hard_brakes)), ma, "r-", linewidth=2)
|
||||||
|
axes[0, 3].set_xlabel("Episode")
|
||||||
|
axes[0, 3].set_ylabel("Hard Brakes Count")
|
||||||
|
axes[0, 3].set_title("Hard Brakes")
|
||||||
|
axes[0, 3].grid(True, alpha=0.3)
|
||||||
|
|
||||||
# Policy Loss
|
# Policy Loss
|
||||||
if policy_losses:
|
if policy_losses:
|
||||||
axes[1, 0].plot(policy_losses, "b-", alpha=0.6)
|
axes[1, 0].plot(policy_losses, "b-", alpha=0.6)
|
||||||
|
|
@ -262,11 +276,14 @@ def _plot_training_curves(
|
||||||
f"Best Reward: {max(rewards):.2f}\n"
|
f"Best Reward: {max(rewards):.2f}\n"
|
||||||
f"Final Avg Reward: {np.mean(rewards[-20:]):.2f}\n"
|
f"Final Avg Reward: {np.mean(rewards[-20:]):.2f}\n"
|
||||||
f"Final Avg Throughput: {np.mean(throughputs[-20:]):.1f}\n"
|
f"Final Avg Throughput: {np.mean(throughputs[-20:]):.1f}\n"
|
||||||
f"Final Avg Speed: {np.mean(mean_speeds[-20:]):.1f} km/h"
|
f"Final Avg Speed: {np.mean(mean_speeds[-20:]):.1f} km/h\n"
|
||||||
|
f"Final Avg Hard Brakes: {np.mean(hard_brakes[-20:]):.1f}"
|
||||||
)
|
)
|
||||||
axes[1, 2].text(0.1, 0.5, summary, fontsize=12, family="monospace",
|
axes[1, 2].text(0.1, 0.5, summary, fontsize=12, family="monospace",
|
||||||
verticalalignment="center", transform=axes[1, 2].transAxes)
|
verticalalignment="center", transform=axes[1, 2].transAxes)
|
||||||
|
|
||||||
|
axes[1, 3].axis("off")
|
||||||
|
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
plt.savefig(save_path, dpi=150, bbox_inches="tight")
|
plt.savefig(save_path, dpi=150, bbox_inches="tight")
|
||||||
print(f"训练曲线已保存: {save_path}")
|
print(f"训练曲线已保存: {save_path}")
|
||||||
|
|
|
||||||
|
|
@ -74,6 +74,7 @@ def train_sumo_dqn():
|
||||||
episode_rewards = []
|
episode_rewards = []
|
||||||
episode_throughputs = []
|
episode_throughputs = []
|
||||||
episode_mean_speeds = []
|
episode_mean_speeds = []
|
||||||
|
episode_hard_brakes = []
|
||||||
losses = []
|
losses = []
|
||||||
best_reward = -float("inf")
|
best_reward = -float("inf")
|
||||||
|
|
||||||
|
|
@ -86,6 +87,7 @@ def train_sumo_dqn():
|
||||||
episode_reward = 0
|
episode_reward = 0
|
||||||
episode_throughput = 0
|
episode_throughput = 0
|
||||||
episode_speed = 0
|
episode_speed = 0
|
||||||
|
episode_brakes = 0
|
||||||
done = False
|
done = False
|
||||||
step = 0
|
step = 0
|
||||||
|
|
||||||
|
|
@ -106,6 +108,7 @@ def train_sumo_dqn():
|
||||||
episode_reward += reward
|
episode_reward += reward
|
||||||
episode_throughput += info["throughput"]
|
episode_throughput += info["throughput"]
|
||||||
episode_speed += info["mean_speed_kmh"]
|
episode_speed += info["mean_speed_kmh"]
|
||||||
|
episode_brakes += info["num_hard_brakes"]
|
||||||
state = next_state
|
state = next_state
|
||||||
step += 1
|
step += 1
|
||||||
|
|
||||||
|
|
@ -123,9 +126,10 @@ def train_sumo_dqn():
|
||||||
episode_rewards.append(episode_reward)
|
episode_rewards.append(episode_reward)
|
||||||
episode_throughputs.append(avg_tp)
|
episode_throughputs.append(avg_tp)
|
||||||
episode_mean_speeds.append(avg_speed)
|
episode_mean_speeds.append(avg_speed)
|
||||||
|
episode_hard_brakes.append(episode_brakes)
|
||||||
|
|
||||||
loss_val = np.mean(losses[-100:]) if losses else None
|
loss_val = np.mean(losses[-100:]) if losses else None
|
||||||
logger.log(episode, episode_reward, avg_tp, avg_speed, value_loss=loss_val)
|
logger.log(episode, episode_reward, avg_tp, avg_speed, episode_brakes, value_loss=loss_val)
|
||||||
|
|
||||||
if episode_reward > best_reward:
|
if episode_reward > best_reward:
|
||||||
best_reward = episode_reward
|
best_reward = episode_reward
|
||||||
|
|
@ -152,7 +156,7 @@ def train_sumo_dqn():
|
||||||
agent.save(os.path.join(checkpoint_dir, f"model_ep{num_episodes}.pt"))
|
agent.save(os.path.join(checkpoint_dir, f"model_ep{num_episodes}.pt"))
|
||||||
|
|
||||||
# 绘制训练曲线
|
# 绘制训练曲线
|
||||||
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
|
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
|
||||||
|
|
||||||
axes[0, 0].plot(episode_rewards, alpha=0.6)
|
axes[0, 0].plot(episode_rewards, alpha=0.6)
|
||||||
window = 20
|
window = 20
|
||||||
|
|
@ -170,10 +174,19 @@ def train_sumo_dqn():
|
||||||
axes[0, 1].set_title('Throughput')
|
axes[0, 1].set_title('Throughput')
|
||||||
axes[0, 1].grid(True, alpha=0.3)
|
axes[0, 1].grid(True, alpha=0.3)
|
||||||
|
|
||||||
axes[1, 0].plot(episode_mean_speeds, 'orange', alpha=0.6)
|
axes[0, 2].plot(episode_mean_speeds, 'orange', alpha=0.6)
|
||||||
|
axes[0, 2].set_xlabel('Episode')
|
||||||
|
axes[0, 2].set_ylabel('Mean Speed (km/h)')
|
||||||
|
axes[0, 2].set_title('Mean Speed')
|
||||||
|
axes[0, 2].grid(True, alpha=0.3)
|
||||||
|
|
||||||
|
axes[1, 0].plot(episode_hard_brakes, 'r-', alpha=0.6)
|
||||||
|
if len(episode_hard_brakes) > window:
|
||||||
|
ma = np.convolve(episode_hard_brakes, np.ones(window)/window, mode='valid')
|
||||||
|
axes[1, 0].plot(range(window-1, len(episode_hard_brakes)), ma, 'b-', linewidth=2)
|
||||||
axes[1, 0].set_xlabel('Episode')
|
axes[1, 0].set_xlabel('Episode')
|
||||||
axes[1, 0].set_ylabel('Mean Speed (km/h)')
|
axes[1, 0].set_ylabel('Hard Brakes Count')
|
||||||
axes[1, 0].set_title('Mean Speed')
|
axes[1, 0].set_title('Hard Brakes')
|
||||||
axes[1, 0].grid(True, alpha=0.3)
|
axes[1, 0].grid(True, alpha=0.3)
|
||||||
|
|
||||||
if losses:
|
if losses:
|
||||||
|
|
@ -183,6 +196,8 @@ def train_sumo_dqn():
|
||||||
axes[1, 1].set_title('Training Loss')
|
axes[1, 1].set_title('Training Loss')
|
||||||
axes[1, 1].grid(True, alpha=0.3)
|
axes[1, 1].grid(True, alpha=0.3)
|
||||||
|
|
||||||
|
axes[1, 2].axis('off')
|
||||||
|
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
plt.savefig(os.path.join(log_dir, "training_curves.png"), dpi=150)
|
plt.savefig(os.path.join(log_dir, "training_curves.png"), dpi=150)
|
||||||
print(f"训练曲线已保存: {os.path.join(log_dir, 'training_curves.png')}")
|
print(f"训练曲线已保存: {os.path.join(log_dir, 'training_curves.png')}")
|
||||||
|
|
|
||||||
|
|
@ -84,6 +84,7 @@ def train_sumo_ppo():
|
||||||
episode_rewards = []
|
episode_rewards = []
|
||||||
episode_throughputs = []
|
episode_throughputs = []
|
||||||
episode_mean_speeds = []
|
episode_mean_speeds = []
|
||||||
|
episode_hard_brakes = []
|
||||||
policy_losses = []
|
policy_losses = []
|
||||||
value_losses = []
|
value_losses = []
|
||||||
entropies = []
|
entropies = []
|
||||||
|
|
@ -99,6 +100,7 @@ def train_sumo_ppo():
|
||||||
episode_reward = 0
|
episode_reward = 0
|
||||||
episode_throughput = 0
|
episode_throughput = 0
|
||||||
episode_speed = 0
|
episode_speed = 0
|
||||||
|
episode_brakes = 0
|
||||||
done = False
|
done = False
|
||||||
step = 0
|
step = 0
|
||||||
|
|
||||||
|
|
@ -117,6 +119,7 @@ def train_sumo_ppo():
|
||||||
episode_reward += reward
|
episode_reward += reward
|
||||||
episode_throughput += info["throughput"]
|
episode_throughput += info["throughput"]
|
||||||
episode_speed += info["mean_speed_kmh"]
|
episode_speed += info["mean_speed_kmh"]
|
||||||
|
episode_brakes += info["num_hard_brakes"]
|
||||||
state = next_state
|
state = next_state
|
||||||
step += 1
|
step += 1
|
||||||
|
|
||||||
|
|
@ -145,15 +148,16 @@ def train_sumo_ppo():
|
||||||
episode_rewards.append(episode_reward)
|
episode_rewards.append(episode_reward)
|
||||||
episode_throughputs.append(avg_tp)
|
episode_throughputs.append(avg_tp)
|
||||||
episode_mean_speeds.append(avg_speed)
|
episode_mean_speeds.append(avg_speed)
|
||||||
|
episode_hard_brakes.append(episode_brakes)
|
||||||
|
|
||||||
if train_stats:
|
if train_stats:
|
||||||
policy_losses.append(train_stats["policy_loss"])
|
policy_losses.append(train_stats["policy_loss"])
|
||||||
value_losses.append(train_stats["value_loss"])
|
value_losses.append(train_stats["value_loss"])
|
||||||
entropies.append(train_stats["entropy"])
|
entropies.append(train_stats["entropy"])
|
||||||
logger.log(episode, episode_reward, avg_tp, avg_speed,
|
logger.log(episode, episode_reward, avg_tp, avg_speed, episode_brakes,
|
||||||
train_stats["policy_loss"], train_stats["value_loss"], train_stats["entropy"])
|
train_stats["policy_loss"], train_stats["value_loss"], train_stats["entropy"])
|
||||||
else:
|
else:
|
||||||
logger.log(episode, episode_reward, avg_tp, avg_speed)
|
logger.log(episode, episode_reward, avg_tp, avg_speed, episode_brakes)
|
||||||
|
|
||||||
# 保存最佳模型
|
# 保存最佳模型
|
||||||
if episode_reward > best_reward:
|
if episode_reward > best_reward:
|
||||||
|
|
@ -187,7 +191,7 @@ def train_sumo_ppo():
|
||||||
|
|
||||||
# 绘制训练曲线
|
# 绘制训练曲线
|
||||||
_plot_training_curves(
|
_plot_training_curves(
|
||||||
episode_rewards, episode_throughputs, episode_mean_speeds,
|
episode_rewards, episode_throughputs, episode_mean_speeds, episode_hard_brakes,
|
||||||
policy_losses, value_losses,
|
policy_losses, value_losses,
|
||||||
save_path=os.path.join(log_dir, "training_curves.png"),
|
save_path=os.path.join(log_dir, "training_curves.png"),
|
||||||
)
|
)
|
||||||
|
|
@ -201,11 +205,11 @@ def train_sumo_ppo():
|
||||||
|
|
||||||
|
|
||||||
def _plot_training_curves(
|
def _plot_training_curves(
|
||||||
rewards, throughputs, mean_speeds, policy_losses, value_losses,
|
rewards, throughputs, mean_speeds, hard_brakes, policy_losses, value_losses,
|
||||||
save_path: str,
|
save_path: str,
|
||||||
):
|
):
|
||||||
"""绘制训练曲线"""
|
"""绘制训练曲线"""
|
||||||
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
|
fig, axes = plt.subplots(2, 4, figsize=(24, 10))
|
||||||
|
|
||||||
window = 20
|
window = 20
|
||||||
|
|
||||||
|
|
@ -239,6 +243,16 @@ def _plot_training_curves(
|
||||||
axes[0, 2].set_title("Mean Speed")
|
axes[0, 2].set_title("Mean Speed")
|
||||||
axes[0, 2].grid(True, alpha=0.3)
|
axes[0, 2].grid(True, alpha=0.3)
|
||||||
|
|
||||||
|
# Hard Brakes
|
||||||
|
axes[0, 3].plot(hard_brakes, alpha=0.4, color="red")
|
||||||
|
if len(hard_brakes) > window:
|
||||||
|
ma = np.convolve(hard_brakes, np.ones(window) / window, mode="valid")
|
||||||
|
axes[0, 3].plot(range(window - 1, len(hard_brakes)), ma, "r-", linewidth=2)
|
||||||
|
axes[0, 3].set_xlabel("Episode")
|
||||||
|
axes[0, 3].set_ylabel("Hard Brakes Count")
|
||||||
|
axes[0, 3].set_title("Hard Brakes")
|
||||||
|
axes[0, 3].grid(True, alpha=0.3)
|
||||||
|
|
||||||
# Policy Loss
|
# Policy Loss
|
||||||
if policy_losses:
|
if policy_losses:
|
||||||
axes[1, 0].plot(policy_losses, "b-", alpha=0.6)
|
axes[1, 0].plot(policy_losses, "b-", alpha=0.6)
|
||||||
|
|
@ -264,11 +278,14 @@ def _plot_training_curves(
|
||||||
f"Best Reward: {max(rewards):.2f}\n"
|
f"Best Reward: {max(rewards):.2f}\n"
|
||||||
f"Final Avg Reward: {np.mean(rewards[-20:]):.2f}\n"
|
f"Final Avg Reward: {np.mean(rewards[-20:]):.2f}\n"
|
||||||
f"Final Avg Throughput: {np.mean(throughputs[-20:]):.1f}\n"
|
f"Final Avg Throughput: {np.mean(throughputs[-20:]):.1f}\n"
|
||||||
f"Final Avg Speed: {np.mean(mean_speeds[-20:]):.1f} km/h"
|
f"Final Avg Speed: {np.mean(mean_speeds[-20:]):.1f} km/h\n"
|
||||||
|
f"Final Avg Hard Brakes: {np.mean(hard_brakes[-20:]):.1f}"
|
||||||
)
|
)
|
||||||
axes[1, 2].text(0.1, 0.5, summary, fontsize=12, family="monospace",
|
axes[1, 2].text(0.1, 0.5, summary, fontsize=12, family="monospace",
|
||||||
verticalalignment="center", transform=axes[1, 2].transAxes)
|
verticalalignment="center", transform=axes[1, 2].transAxes)
|
||||||
|
|
||||||
|
axes[1, 3].axis("off")
|
||||||
|
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
plt.savefig(save_path, dpi=150, bbox_inches="tight")
|
plt.savefig(save_path, dpi=150, bbox_inches="tight")
|
||||||
print(f"训练曲线已保存: {save_path}")
|
print(f"训练曲线已保存: {save_path}")
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ class TrainingLogger:
|
||||||
def __init__(self, log_dir, model_name, resume=False):
|
def __init__(self, log_dir, model_name, resume=False):
|
||||||
self.log_path = os.path.join(log_dir, f"{model_name}_training_log.csv")
|
self.log_path = os.path.join(log_dir, f"{model_name}_training_log.csv")
|
||||||
self.fieldnames = [
|
self.fieldnames = [
|
||||||
"episode", "reward", "throughput", "mean_speed",
|
"episode", "reward", "throughput", "mean_speed", "hard_brakes",
|
||||||
"policy_loss", "value_loss", "entropy"
|
"policy_loss", "value_loss", "entropy"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -16,7 +16,7 @@ class TrainingLogger:
|
||||||
writer = csv.DictWriter(f, fieldnames=self.fieldnames)
|
writer = csv.DictWriter(f, fieldnames=self.fieldnames)
|
||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
|
|
||||||
def log(self, episode, reward, throughput, mean_speed,
|
def log(self, episode, reward, throughput, mean_speed, hard_brakes=0,
|
||||||
policy_loss=None, value_loss=None, entropy=None):
|
policy_loss=None, value_loss=None, entropy=None):
|
||||||
with open(self.log_path, "a", newline="", encoding="utf-8") as f:
|
with open(self.log_path, "a", newline="", encoding="utf-8") as f:
|
||||||
writer = csv.DictWriter(f, fieldnames=self.fieldnames)
|
writer = csv.DictWriter(f, fieldnames=self.fieldnames)
|
||||||
|
|
@ -25,6 +25,7 @@ class TrainingLogger:
|
||||||
"reward": f"{reward:.4f}",
|
"reward": f"{reward:.4f}",
|
||||||
"throughput": f"{throughput:.2f}",
|
"throughput": f"{throughput:.2f}",
|
||||||
"mean_speed": f"{mean_speed:.2f}",
|
"mean_speed": f"{mean_speed:.2f}",
|
||||||
|
"hard_brakes": f"{hard_brakes:.0f}",
|
||||||
"policy_loss": f"{policy_loss:.6f}" if policy_loss is not None else "",
|
"policy_loss": f"{policy_loss:.6f}" if policy_loss is not None else "",
|
||||||
"value_loss": f"{value_loss:.6f}" if value_loss is not None else "",
|
"value_loss": f"{value_loss:.6f}" if value_loss is not None else "",
|
||||||
"entropy": f"{entropy:.6f}" if entropy is not None else ""
|
"entropy": f"{entropy:.6f}" if entropy is not None else ""
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue