添加随机种子，自动保存最优模型

2026-01-05 17:04:27 +08:00 · 2026-01-05 17:04:27 +08:00 · 39b0134609
parent fd0dc80a91
commit 39b0134609
2 changed files with 33 additions and 1 deletions
--- a/config.yaml
+++ b/config.yaml
@ -54,6 +54,7 @@ training:
  log_freq: 10                     # Logging frequency (episodes)
  checkpoint_dir: "checkpoints"    # Checkpoint directory
  log_dir: "logs"                  # Log directory
+  random_seed: 42                  # Random seed for reproducibility

 # Testing Parameters
 testing:
--- a/train.py
+++ b/train.py
@ -2,7 +2,9 @@
 Training script for DQN-based speed limit control.
 """
 import os
+import random
 import numpy as np
+import torch
 from tqdm import tqdm
 import matplotlib.pyplot as plt
 from utils import load_config, create_directories
@ -10,11 +12,28 @@ from environment import TrafficEnvironment
 from dqn_agent import DQNAgent


+def set_random_seed(seed: int):
+    """Set random seed for reproducibility."""
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+
+
 def train(config_path: str = "config.yaml"):
    """Train DQN agent."""
    config = load_config(config_path)
    create_directories(config)

+    # Set random seed for reproducibility
+    random_seed = config["training"].get("random_seed", 42)
+    set_random_seed(random_seed)
+    print(f"Random seed set to: {random_seed}")
+
    env = TrafficEnvironment(config)
    agent = DQNAgent(
        state_dim=env.state_dim,
@ -40,6 +59,10 @@ def train(config_path: str = "config.yaml"):
    episode_losses = []
    episode_throughputs = []

+    # Track best model
+    best_reward = float('-inf')
+    best_model_path = os.path.join(checkpoint_dir, "model_best.pt")
+
    print(f"Starting training for {num_episodes} episodes...")
    print(f"State dim: {env.state_dim}, Action dim: {env.action_dim}")
    print(f"Device: {agent.device}")
@ -74,6 +97,12 @@ def train(config_path: str = "config.yaml"):
        avg_throughput = np.mean([m["throughput"] for m in env.episode_metrics])
        episode_throughputs.append(avg_throughput)

+        # Save best model based on episode reward
+        if episode_reward > best_reward:
+            best_reward = episode_reward
+            agent.save(best_model_path)
+            print(f"\n*** New best model saved! Episode {episode + 1}, Reward: {episode_reward:.2f} ***")
+
        if (episode + 1) % log_freq == 0:
            avg_reward = np.mean(episode_rewards[-log_freq:])
            avg_loss = np.mean(episode_losses[-log_freq:])
@ -95,7 +124,9 @@ def train(config_path: str = "config.yaml"):

    final_model_path = os.path.join(checkpoint_dir, "model_final.pt")
    agent.save(final_model_path)
-    print(f"\nTraining completed! Final model saved to {final_model_path}")
+    print(f"\nTraining completed!")
+    print(f"Final model saved to {final_model_path}")
+    print(f"Best model saved to {best_model_path} (Best Reward: {best_reward:.2f})")

    plot_training_results(
        episode_rewards, episode_losses, episode_throughputs, config["training"]["log_dir"]