diff --git a/config.yaml b/config.yaml
index 19dfe6e..e4ca81e 100644
--- a/config.yaml
+++ b/config.yaml
@@ -69,5 +69,5 @@ testing:
 reward:
   throughput_weight: 1.0           # Weight for throughput reward
   speed_weight: 0.5                # Weight for average speed reward
-  density_weight: -0.3             # Weight for density penalty
+  density_weight: 0.3             # Weight for density penalty
   action_change_weight: -0.1       # Weight for action change penalty
\ No newline at end of file
diff --git a/ctm_model.py b/ctm_model.py
index 0ddcc74..7c2b4ee 100644
--- a/ctm_model.py
+++ b/ctm_model.py
@@ -69,17 +69,11 @@ class CTMModel:
         return np.concatenate([self.densities, self.speed_limits])
 
     def _calculate_sending_flow(self, cell_idx: int) -> float:
-        """
-        Calculate sending flow from a cell (vehicles per time step).
-
-        Returns flow in vehicles that can leave the cell during one time step.
-        """
+        """Calculate sending flow from a cell (vehicles per time step)."""
         density = self.densities[cell_idx]
         speed_limit = self.speed_limits[cell_idx]
 
         effective_speed = min(speed_limit, self.free_flow_speed)
-        # Flow = density (veh/km) * speed (m/s) * 3.6 to get veh/h, then convert to veh/timestep
-        # Simplified: density * speed * time_step / 1000 gives vehicles per time step
         sending_flow = min(
             density * effective_speed * self.time_step / 1000.0,
             self.capacity * self.time_step / 1000.0
@@ -87,15 +81,9 @@ class CTMModel:
         return sending_flow
 
     def _calculate_receiving_flow(self, cell_idx: int) -> float:
-        """
-        Calculate receiving flow to a cell (vehicles per time step).
-
-        Returns flow in vehicles that can enter the cell during one time step.
-        """
+        """Calculate receiving flow to a cell (vehicles per time step)."""
         density = self.densities[cell_idx]
 
-        # Receiving capacity based on available space
-        # congestion_wave_speed (m/s) * (jam_density - density) (veh/km) * time_step / 1000
         receiving_flow = min(
             self.capacity * self.time_step / 1000.0,
             self.congestion_wave_speed * (self.jam_density - density) * self.time_step / 1000.0
diff --git a/environment.py b/environment.py
index 03d275d..863ffc3 100644
--- a/environment.py
+++ b/environment.py
@@ -66,6 +66,7 @@ class TrafficEnvironment:
         self.current_step = 0
         self.previous_action = None
         self.episode_metrics = []
+        self.current_inflow = 0.0  # Track current inflow for reward calculation
 
     def reset(self) -> np.ndarray:
         """Reset environment to initial state."""
@@ -73,6 +74,7 @@ class TrafficEnvironment:
         self.current_step = 0
         self.previous_action = None
         self.episode_metrics = []
+        self.current_inflow = 0.0
 
         # Reset demand loader if using CSV
         if self.demand_loader is not None:
@@ -99,23 +101,41 @@ class TrafficEnvironment:
         return max(0, demand)
 
     def _calculate_reward(self, info: Dict, action: int) -> float:
-        """Calculate reward based on traffic metrics."""
+        """Calculate reward based on traffic metrics with non-linear normalization."""
         throughput = info["throughput"]
         avg_density = info["average_density"]
 
         avg_speed = 0
+        count = 0
         for i in range(self.ctm.num_cells):
             if self.ctm.densities[i] > 0:
                 avg_speed += min(
                     self.ctm.speed_limits[i],
                     self.ctm.free_flow_speed * (1 - self.ctm.densities[i] / self.ctm.jam_density)
                 )
-        avg_speed /= self.ctm.num_cells
+                count += 1
+        avg_speed = avg_speed / count if count > 0 else 0
+
+        # Non-linear normalization using tanh, centered on current inflow
+        # This makes the reward adaptive to traffic conditions
+        # When inflow is high, we expect high throughput; when low, we expect lower throughput
+        # inflow_target = max(self.current_inflow, 1000.0)  # Avoid division by very small numbers
+        inflow_target = self.current_inflow  # Avoid division by very small numbers
+        throughput_ratio = throughput / inflow_target
+        # throughput_ratio: 1.0 means perfect flow (output = input)
+        # > 1.0 is impossible in steady state, < 1.0 means congestion
+        throughput_norm = (np.tanh((throughput_ratio - 0.9) / 0.2) + 1.0) / 2.0
+        # Speed reward: penalize low speeds more heavily
+        speed_norm = (avg_speed / self.max_speed_limit) ** 2
+
+        # Density penalty: exponential penalty for high density
+        density_ratio = avg_density / self.ctm.critical_density
+        density_norm = np.exp(-density_ratio)
 
         reward = (
-            self.throughput_weight * (throughput / 2000.0)
-            + self.speed_weight * (avg_speed / self.max_speed_limit)
-            + self.density_weight * (avg_density / self.ctm.critical_density)
+            self.throughput_weight * throughput_norm
+            + self.speed_weight * speed_norm
+            + self.density_weight * (1.0 - density_norm)
         )
 
         if self.previous_action is not None and action != self.previous_action:
@@ -131,6 +151,7 @@ class TrafficEnvironment:
             self.ctm.set_speed_limit(i, speed_limit)
 
         inflow = self._generate_demand()
+        self.current_inflow = inflow  # Store current inflow for reward calculation
         outflow = 2000.0
 
         next_state, info = self.ctm.step(inflow, outflow)