diff --git a/config.yaml b/config.yaml index 3ad53d3..22832b6 100644 --- a/config.yaml +++ b/config.yaml @@ -54,6 +54,7 @@ training: log_freq: 10 # Logging frequency (episodes) checkpoint_dir: "checkpoints" # Checkpoint directory log_dir: "logs" # Log directory + random_seed: 42 # Random seed for reproducibility # Testing Parameters testing: diff --git a/train.py b/train.py index a4a3781..14f32a1 100644 --- a/train.py +++ b/train.py @@ -2,7 +2,9 @@ Training script for DQN-based speed limit control. """ import os +import random import numpy as np +import torch from tqdm import tqdm import matplotlib.pyplot as plt from utils import load_config, create_directories @@ -10,11 +12,28 @@ from environment import TrafficEnvironment from dqn_agent import DQNAgent +def set_random_seed(seed: int): + """Set random seed for reproducibility.""" + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + def train(config_path: str = "config.yaml"): """Train DQN agent.""" config = load_config(config_path) create_directories(config) + # Set random seed for reproducibility + random_seed = config["training"].get("random_seed", 42) + set_random_seed(random_seed) + print(f"Random seed set to: {random_seed}") + env = TrafficEnvironment(config) agent = DQNAgent( state_dim=env.state_dim, @@ -40,6 +59,10 @@ def train(config_path: str = "config.yaml"): episode_losses = [] episode_throughputs = [] + # Track best model + best_reward = float('-inf') + best_model_path = os.path.join(checkpoint_dir, "model_best.pt") + print(f"Starting training for {num_episodes} episodes...") print(f"State dim: {env.state_dim}, Action dim: {env.action_dim}") print(f"Device: {agent.device}") @@ -74,6 +97,12 @@ def train(config_path: str = "config.yaml"): avg_throughput = np.mean([m["throughput"] for m in env.episode_metrics]) episode_throughputs.append(avg_throughput) + # Save best model based on episode reward + if episode_reward > best_reward: + best_reward = episode_reward + agent.save(best_model_path) + print(f"\n*** New best model saved! Episode {episode + 1}, Reward: {episode_reward:.2f} ***") + if (episode + 1) % log_freq == 0: avg_reward = np.mean(episode_rewards[-log_freq:]) avg_loss = np.mean(episode_losses[-log_freq:]) @@ -95,7 +124,9 @@ def train(config_path: str = "config.yaml"): final_model_path = os.path.join(checkpoint_dir, "model_final.pt") agent.save(final_model_path) - print(f"\nTraining completed! Final model saved to {final_model_path}") + print(f"\nTraining completed!") + print(f"Final model saved to {final_model_path}") + print(f"Best model saved to {best_model_path} (Best Reward: {best_reward:.2f})") plot_training_results( episode_rewards, episode_losses, episode_throughputs, config["training"]["log_dir"]