diff --git a/agents/tcamappo_agent.py b/agents/tcamappo_agent.py
index 1e55e44..3092c7a 100644
--- a/agents/tcamappo_agent.py
+++ b/agents/tcamappo_agent.py
@@ -262,7 +262,7 @@ class TCAMAPPOAgent:
         action_norm = np.asarray(action, dtype=np.float32) / max(self.num_actions - 1, 1)
         reward_features = np.array(
             [
-                float(reward) / 10.0,
+                float(reward),
                 *[float(info.get(column, 0.0)) for column in REWARD_COMPONENT_COLUMNS],
             ],
             dtype=np.float32,
diff --git a/config_sumo_vsl.yaml b/config_sumo_vsl.yaml
index 7f3a846..c6242f4 100644
--- a/config_sumo_vsl.yaml
+++ b/config_sumo_vsl.yaml
@@ -53,9 +53,8 @@ environment:
   free_flow_speed: 30.56
 
   reward:
-    reward_scale: 10.0
-    efficiency_alpha: 3.0
-    safety_beta: 4.0
+    efficiency_alpha: 2.19
+    safety_beta: 9.19
     efficiency_exponent: 0.50
     safety_exponent: 0.50
     ttc_threshold_s: 2.3
diff --git a/envs/edge_vsl_env.py b/envs/edge_vsl_env.py
index a4ddb86..669f003 100644
--- a/envs/edge_vsl_env.py
+++ b/envs/edge_vsl_env.py
@@ -554,7 +554,7 @@ class SUMOEdgeVSLEnvironment:
         state_parts.append(time_progress)
         state_parts.append(np.sin(2 * np.pi * time_progress))
         state_parts.append(np.cos(2 * np.pi * time_progress))
-        state_parts.append(self._last_reward / 10.0)
+        state_parts.append(self._last_reward)
 
         return np.array(state_parts, dtype=np.float32)
 
diff --git a/envs/reward_design_blueprint.py b/envs/reward_design_blueprint.py
index ce0ec58..f6dc5b9 100644
--- a/envs/reward_design_blueprint.py
+++ b/envs/reward_design_blueprint.py
@@ -135,7 +135,7 @@ def build_tca_mappo_reward_blueprint() -> RewardBlueprint:
         ),
         terms=terms,
         global_formula_tex=(
-            r"R(t)=\kappa \,R_{\mathrm{efficiency}}(t)^{\lambda_{\mathrm{eff}}}"
+            r"R(t)=R_{\mathrm{efficiency}}(t)^{\lambda_{\mathrm{eff}}}"
             r"R_{\mathrm{safety}}(t)^{\lambda_{\mathrm{safe}}}"
         ),
         excluded_metrics=(
diff --git a/envs/reward_system.py b/envs/reward_system.py
index a84127f..5ccd0f4 100644
--- a/envs/reward_system.py
+++ b/envs/reward_system.py
@@ -36,9 +36,8 @@ def average_reward_components(totals: Mapping[str, float], steps: int) -> Dict[s
 
 @dataclass(frozen=True)
 class RewardConfig:
-    reward_scale: float = 10.0
-    efficiency_alpha: float = 3.0
-    safety_beta: float = 4.0
+    efficiency_alpha: float = 2.19
+    safety_beta: float = 9.19
     efficiency_exponent: float = 0.50
     safety_exponent: float = 0.50
     ttc_threshold_s: float = 2.3
@@ -56,9 +55,8 @@ class RewardConfig:
         _ = speed_actions_ms
 
         return cls(
-            reward_scale=float(raw_cfg.get("reward_scale", 10.0)),
-            efficiency_alpha=float(raw_cfg.get("efficiency_alpha", 3.0)),
-            safety_beta=float(raw_cfg.get("safety_beta", 4.0)),
+            efficiency_alpha=float(raw_cfg.get("efficiency_alpha", 2.19)),
+            safety_beta=float(raw_cfg.get("safety_beta", 9.19)),
             efficiency_exponent=float(raw_cfg.get("efficiency_exponent", 0.50)),
             safety_exponent=float(raw_cfg.get("safety_exponent", 0.50)),
             ttc_threshold_s=float(raw_cfg.get("ttc_threshold_s", 2.3)),
@@ -122,4 +120,4 @@ class RewardCalculator:
         info["ttc_threshold_s"] = float(self.config.ttc_threshold_s)
         info["efficiency_lambda"] = float(lambda_eff)
         info["safety_lambda"] = float(lambda_safe)
-        return float(r_utility * self.config.reward_scale)
+        return float(r_utility)