diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna2/config.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna2/config.py
index aced94ab..a3a5bb76 100644
--- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna2/config.py
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna2/config.py
@@ -8,17 +8,17 @@
 # Supported Luna-2 metrics
 Luna2Metric = Literal[
     "input_toxicity",
-    "output_toxicity",
+    "toxicity",           # output toxicity
     "input_sexism",
     "output_sexism",
     "prompt_injection",
-    "pii_detection",
+    "pii",                # output PII
+    "input_pii",
     "hallucination",
     "tone",
 ]
 
-# Supported operators
-Luna2Operator = Literal["gt", "lt", "gte", "lte", "eq", "contains", "any"]
+Luna2Operator = Literal["gt", "lt", "gte", "lte", "eq", "contains", "any", "not_empty"]
 
 
 class Luna2EvaluatorConfig(EvaluatorConfig):
@@ -74,7 +74,7 @@ class Luna2EvaluatorConfig(EvaluatorConfig):
     # Central stage fields
     stage_name: str | None = Field(
         default=None,
-        description="Stage name in Galileo (required for central stage)",
+        description="Stage name in Galileo (required for both local and central stages)",
     )
     stage_version: int | None = Field(
         default=None,
@@ -113,9 +113,10 @@ def validate_stage_config(self) -> "Luna2EvaluatorConfig":
                 raise ValueError("'metric' is required for local stage")
             if not self.operator:
                 raise ValueError("'operator' is required for local stage")
-            if self.target_value is None:
+            # not_empty / not_null operators don't need a comparison value
+            if self.target_value is None and self.operator not in ("not_empty", "not_null"):
                 raise ValueError("'target_value' is required for local stage")
-        elif self.stage_type == "central":
-            if not self.stage_name:
-                raise ValueError("'stage_name' is required for central stage")
+
+        if not self.stage_name:
+            raise ValueError("'stage_name' is required for both central and local stages")
         return self
diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna2/evaluator.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna2/evaluator.py
index a6bb146c..5169a5a5 100644
--- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna2/evaluator.py
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna2/evaluator.py
@@ -172,6 +172,20 @@ def _get_numeric_target_value(self) -> float | int | str | None:
     async def _evaluate_local_stage(self, data: Any) -> EvaluatorResult:
         """Evaluate using a local stage (runtime rulesets).
 
+        We use PASSTHROUGH action so Protect computes the metric and returns
+        metric_results without making a block decision itself — agent-control
+        owns that decision via the control's action.decision field.
+
+        Numeric operators (gt, lt, gte, lte, eq): Protect evaluates the rule
+        server-side and returns status="triggered" when the condition is met,
+        so _parse_response picks it up directly.
+
+        Categorical operators (not_empty, any): the Protect local-stage rule
+        engine does not support these operators and always returns
+        status="not_triggered", even when the metric value is non-empty.
+        _parse_response falls back to _evaluate_metric_results which evaluates
+        the condition client-side from the raw metric_results dict.
+
         Args:
             data: The data to evaluate.
 
@@ -187,7 +201,6 @@ async def _evaluate_local_stage(self, data: Any) -> EvaluatorResult:
             target_value=self._get_numeric_target_value() or 0,
         )
 
-        # Create proper Ruleset with PassthroughAction
         ruleset = Ruleset(
             rules=[rule],
             action=PassthroughAction(type="PASSTHROUGH"),
@@ -204,6 +217,7 @@ async def _evaluate_local_stage(self, data: Any) -> EvaluatorResult:
                 payload=payload,
                 prioritized_rulesets=[ruleset],
                 project_name=self.config.galileo_project,
+                stage_name=self.config.stage_name,
                 timeout=self.get_timeout_seconds(),
                 metadata=self.config.metadata or {},
             )
@@ -279,10 +293,20 @@ def _prepare_payload(self, data: Any) -> Payload:
         is_output_metric = "output" in metric
 
         if is_output_metric:
-            return Payload(input="", output=data_str)
+            payload = Payload(input="", output=data_str)
         else:
             # Default to input for central stages or input metrics
-            return Payload(input=data_str, output="")
+            payload = Payload(input=data_str, output="")
+
+        logger.debug(
+            "[Luna2] _prepare_payload: metric=%r payload_field_config=%r "
+            "→ input=%d chars, output=%d chars",
+            self.config.metric,
+            self.config.payload_field,
+            len(payload.input),
+            len(payload.output),
+        )
+        return payload
 
     def _parse_response(self, response: ProtectResponse | None) -> EvaluatorResult:
         """Parse Galileo Protect response into EvaluatorResult.
@@ -304,16 +328,34 @@ def _parse_response(self, response: ProtectResponse | None) -> EvaluatorResult:
         status = response.status.lower() if response.status else "unknown"
         triggered = status == "triggered"
 
+        # Numeric operators (gt/lt/etc.) are evaluated server-side by Protect and
+        # return status="triggered" correctly even with PASSTHROUGH.
+        # Categorical operators (not_empty, any) are NOT supported by Protect's
+        # local-stage rule engine — it always returns status="not_triggered" for
+        # them regardless of the metric value.  Fall back to client-side evaluation
+        # from metric_results for those cases.
+        if not triggered and response.metric_results:
+            triggered = self._evaluate_metric_results(response.metric_results)
+
+        logger.info(
+            "[Luna2] response: status=%r triggered=%s metric_results=%s",
+            status,
+            triggered,
+            response.metric_results,
+        )
+
         # Extract trace metadata
         trace_id = response.trace_metadata.id if response.trace_metadata else None
         execution_time = response.trace_metadata.execution_time if response.trace_metadata else None
         received_at = response.trace_metadata.received_at if response.trace_metadata else None
         response_at = response.trace_metadata.response_at if response.trace_metadata else None
 
+        message = self._build_message(triggered, status, response.metric_results)
+
         return EvaluatorResult(
             matched=triggered,
             confidence=1.0 if triggered else 0.0,
-            message=response.text or f"Luna-2 check: {status}",
+            message=message,
             metadata={
                 "status": status,
                 "metric": self.config.metric or "unknown",
@@ -324,6 +366,74 @@ def _parse_response(self, response: ProtectResponse | None) -> EvaluatorResult:
             },
         )
 
+    def _build_message(self, triggered: bool, status: str, metric_results: dict) -> str:
+        """Build a human-readable message from the evaluation result."""
+        metric = self.config.metric or "unknown"
+
+        if not triggered:
+            return f"Luna-2 {metric} check passed"
+
+        result = (metric_results or {}).get(metric, {})
+        value = result.get("value")
+
+        if isinstance(value, list) and value:
+            categories = ", ".join(str(v).replace("_", " ") for v in value)
+            return f"{metric} detected: {categories}"
+        if isinstance(value, (int, float)):
+            return f"{metric} score {value:.2f} exceeded threshold"
+
+        return f"Luna-2 {metric} check triggered"
+
+    def _evaluate_metric_results(self, metric_results: dict) -> bool:
+        """Evaluate the configured operator/target against raw metric_results.
+
+        Used when the Protect API returns PASSTHROUGH (no server-side trigger)
+        but we still need to decide whether the rule condition is met.
+
+        Args:
+            metric_results: The metric_results dict from the Protect API response.
+
+        Returns:
+            True if the rule condition is satisfied.
+        """
+        metric = self.config.metric
+        if not metric or metric not in metric_results:
+            return False
+
+        result = metric_results[metric]
+        if result.get("status") != "SUCCESS":
+            return False
+
+        value = result.get("value")
+        operator = self.config.operator
+        target = self.config.target_value
+
+        if operator in ("not_empty", "not_null"):
+            return bool(value)
+        if operator in ("empty", "is_null"):
+            return not bool(value)
+        if operator == "any" and isinstance(value, list):
+            return target in value if target is not None else bool(value)
+        if operator == "contains":
+            return target in value if (value and target is not None) else False
+        if isinstance(value, (int, float)) and target is not None:
+            try:
+                t = float(target)
+                if operator == "gt":
+                    return value > t
+                if operator == "gte":
+                    return value >= t
+                if operator == "lt":
+                    return value < t
+                if operator == "lte":
+                    return value <= t
+                if operator == "eq":
+                    return value == t
+            except (TypeError, ValueError):
+                pass
+
+        return False
+
     def _handle_error(self, error: Exception) -> EvaluatorResult:
         """Handle errors from Luna-2 evaluation.
 
diff --git a/evaluators/contrib/galileo/tests/test_luna2_evaluator.py b/evaluators/contrib/galileo/tests/test_luna2_evaluator.py
index 0f6e45d7..6e2c5c41 100644
--- a/evaluators/contrib/galileo/tests/test_luna2_evaluator.py
+++ b/evaluators/contrib/galileo/tests/test_luna2_evaluator.py
@@ -50,6 +50,7 @@ def test_local_stage_config_valid(self):
 
         config = Luna2EvaluatorConfig(
             stage_type="local",
+            stage_name="test-stage",
             metric="input_toxicity",
             operator="gt",
             target_value="0.5",
@@ -68,6 +69,7 @@ def test_local_stage_config_with_numeric_target(self):
 
         config = Luna2EvaluatorConfig(
             stage_type="local",
+            stage_name="test-stage",
             metric="input_toxicity",
             operator="gt",
             target_value=0.5,  # Numeric value
@@ -133,6 +135,18 @@ def test_central_stage_requires_stage_name(self):
                 galileo_project="my-project",
             )
 
+    def test_local_stage_requires_stage_name(self):
+        """Test local stage also requires stage_name field."""
+        from agent_control_evaluator_galileo.luna2 import Luna2EvaluatorConfig
+
+        with pytest.raises(ValidationError, match="stage_name.*required"):
+            Luna2EvaluatorConfig(
+                stage_type="local",
+                metric="input_toxicity",
+                operator="gt",
+                target_value="0.5",
+            )
+
     def test_timeout_ms_validation(self):
         """Test timeout_ms must be within valid range."""
         from agent_control_evaluator_galileo.luna2 import Luna2EvaluatorConfig
@@ -141,7 +155,7 @@ def test_timeout_ms_validation(self):
         with pytest.raises(ValidationError):
             Luna2EvaluatorConfig(
                 stage_type="central",
-                stage_name="test",
+                stage_name="test-stage",
                 timeout_ms=500,  # Below 1000
             )
 
@@ -149,14 +163,14 @@ def test_timeout_ms_validation(self):
         with pytest.raises(ValidationError):
             Luna2EvaluatorConfig(
                 stage_type="central",
-                stage_name="test",
+                stage_name="test-stage",
                 timeout_ms=100000,  # Above 60000
             )
 
         # Valid
         config = Luna2EvaluatorConfig(
             stage_type="central",
-            stage_name="test",
+            stage_name="test-stage",
             timeout_ms=30000,
         )
         assert config.timeout_ms == 30000
@@ -167,14 +181,14 @@ def test_on_error_validation(self):
 
         config_allow = Luna2EvaluatorConfig(
             stage_type="central",
-            stage_name="test",
+            stage_name="test-stage",
             on_error="allow",
         )
         assert config_allow.on_error == "allow"
 
         config_deny = Luna2EvaluatorConfig(
             stage_type="central",
-            stage_name="test",
+            stage_name="test-stage",
             on_error="deny",
         )
         assert config_deny.on_error == "deny"
@@ -182,7 +196,7 @@ def test_on_error_validation(self):
         with pytest.raises(ValidationError):
             Luna2EvaluatorConfig(
                 stage_type="central",
-                stage_name="test",
+                stage_name="test-stage",
                 on_error="invalid",
             )
 
@@ -193,15 +207,16 @@ def test_metric_validation(self):
         # Valid metrics
         valid_metrics = [
             "input_toxicity",
-            "output_toxicity",
+            "toxicity",
             "prompt_injection",
-            "pii_detection",
+            "pii",
             "hallucination",
             "tone",
         ]
         for metric in valid_metrics:
             config = Luna2EvaluatorConfig(
                 stage_type="local",
+                stage_name="test-stage",
                 metric=metric,
                 operator="gt",
                 target_value="0.5",
@@ -212,6 +227,7 @@ def test_metric_validation(self):
         with pytest.raises(ValidationError):
             Luna2EvaluatorConfig(
                 stage_type="local",
+                stage_name="test-stage",
                 metric="invalid_metric",
                 operator="gt",
                 target_value="0.5",
@@ -225,6 +241,7 @@ def test_operator_validation(self):
         for op in valid_operators:
             config = Luna2EvaluatorConfig(
                 stage_type="local",
+                stage_name="test-stage",
                 metric="input_toxicity",
                 operator=op,
                 target_value="0.5",
@@ -234,6 +251,7 @@ def test_operator_validation(self):
         with pytest.raises(ValidationError):
             Luna2EvaluatorConfig(
                 stage_type="local",
+                stage_name="test-stage",
                 metric="input_toxicity",
                 operator="invalid_op",
                 target_value="0.5",
@@ -245,6 +263,7 @@ def test_model_dump(self):
 
         config = Luna2EvaluatorConfig(
             stage_type="local",
+            stage_name="test-stage",
             metric="input_toxicity",
             operator="gt",
             target_value="0.5",
@@ -254,11 +273,11 @@ def test_model_dump(self):
         data = config.model_dump(exclude_none=True)
 
         assert data["stage_type"] == "local"
+        assert data["stage_name"] == "test-stage"
         assert data["metric"] == "input_toxicity"
         assert data["operator"] == "gt"
         assert data["target_value"] == "0.5"
         assert data["galileo_project"] == "test-project"
-        assert "stage_name" not in data  # None excluded
 
 
 class TestLuna2EvaluatorInheritance:
@@ -322,6 +341,7 @@ def test_luna2_evaluator_init_without_api_key_raises_error(self):
 
         config = {
             "stage_type": "local",
+            "stage_name": "test-stage",
             "metric": "input_toxicity",
             "operator": "gt",
             "target_value": "0.5",
@@ -394,6 +414,7 @@ async def test_local_stage_triggered(self):
 
         config = {
             "stage_type": "local",
+            "stage_name": "test-stage",
             "metric": "input_toxicity",
             "operator": "gt",
             "target_value": 0.8,
@@ -433,6 +454,7 @@ async def test_local_stage_not_triggered(self):
 
         config = {
             "stage_type": "local",
+            "stage_name": "test-stage",
             "metric": "input_toxicity",
             "operator": "gt",
             "target_value": 0.8,
@@ -464,6 +486,7 @@ async def test_local_stage_with_timeout_ms(self):
 
         config = {
             "stage_type": "local",
+            "stage_name": "test-stage",
             "metric": "input_toxicity",
             "operator": "gt",
             "target_value": 0.8,
@@ -563,6 +586,7 @@ def test_input_metric_payload(self):
 
         config = {
             "stage_type": "local",
+            "stage_name": "test-stage",
             "metric": "input_toxicity",
             "operator": "gt",
             "target_value": 0.8,
@@ -583,7 +607,8 @@ def test_output_metric_payload(self):
 
         config = {
             "stage_type": "local",
-            "metric": "output_toxicity",
+            "stage_name": "test-stage",
+            "metric": "output_sexism",
             "operator": "gt",
             "target_value": 0.7,
         }
@@ -628,6 +653,7 @@ async def test_error_with_fail_open(self):
 
         config = {
             "stage_type": "local",
+            "stage_name": "test-stage",
             "metric": "input_toxicity",
             "operator": "gt",
             "target_value": 0.8,
@@ -658,6 +684,7 @@ async def test_error_with_fail_closed(self):
 
         config = {
             "stage_type": "local",
+            "stage_name": "test-stage",
             "metric": "input_toxicity",
             "operator": "gt",
             "target_value": 0.8,
@@ -688,6 +715,7 @@ async def test_empty_response_handling(self):
 
         config = {
             "stage_type": "local",
+            "stage_name": "test-stage",
             "metric": "input_toxicity",
             "operator": "gt",
             "target_value": 0.8,
@@ -718,6 +746,7 @@ def test_get_timeout_from_config(self):
 
         config = {
             "stage_type": "local",
+            "stage_name": "test-stage",
             "metric": "input_toxicity",
             "operator": "gt",
             "target_value": "0.5",
@@ -735,6 +764,7 @@ def test_get_timeout_from_default(self):
 
         config = {
             "stage_type": "local",
+            "stage_name": "test-stage",
             "metric": "input_toxicity",
             "operator": "gt",
             "target_value": "0.5",
@@ -756,6 +786,7 @@ def test_numeric_target_value_float(self):
 
         config = {
             "stage_type": "local",
+            "stage_name": "test-stage",
             "metric": "input_toxicity",
             "operator": "gt",
             "target_value": 0.5,
@@ -772,6 +803,7 @@ def test_numeric_target_value_int(self):
 
         config = {
             "stage_type": "local",
+            "stage_name": "test-stage",
             "metric": "input_toxicity",
             "operator": "gt",
             "target_value": 1,
@@ -788,6 +820,7 @@ def test_string_target_value_converts_to_float(self):
 
         config = {
             "stage_type": "local",
+            "stage_name": "test-stage",
             "metric": "input_toxicity",
             "operator": "gt",
             "target_value": "0.75",
@@ -797,6 +830,76 @@ def test_string_target_value_converts_to_float(self):
         assert evaluator._get_numeric_target_value() == 0.75
 
 
+class TestLuna2BuildMessage:
+    """Tests for _build_message helper."""
+
+    @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"})
+    @patch("agent_control_evaluator_galileo.luna2.evaluator.LUNA2_AVAILABLE", True)
+    def test_build_message_list_value_uses_metric_name(self):
+        """Test that list-valued metrics use the metric name, not a hardcoded label."""
+        from agent_control_evaluator_galileo.luna2 import Luna2Evaluator
+
+        for metric in ("pii", "input_pii", "tone"):
+            config = {
+                "stage_type": "local",
+                "stage_name": "test-stage",
+                "metric": metric,
+                "operator": "not_empty",
+                "target_value": None,
+            }
+            # not_empty doesn't require target_value — skip via not_empty operator path
+            from agent_control_evaluator_galileo.luna2 import Luna2EvaluatorConfig
+            cfg = Luna2EvaluatorConfig(
+                stage_type="local",
+                stage_name="test-stage",
+                metric=metric,
+                operator="not_empty",
+            )
+            evaluator = Luna2Evaluator(cfg)
+            metric_results = {metric: {"value": ["category_a", "category_b"]}}
+            msg = evaluator._build_message(triggered=True, status="triggered", metric_results=metric_results)
+            assert metric in msg
+            assert "category a" in msg
+            assert "PII detected" not in msg
+
+    @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"})
+    @patch("agent_control_evaluator_galileo.luna2.evaluator.LUNA2_AVAILABLE", True)
+    def test_build_message_not_triggered(self):
+        """Test message when check passes."""
+        from agent_control_evaluator_galileo.luna2 import Luna2Evaluator, Luna2EvaluatorConfig
+
+        cfg = Luna2EvaluatorConfig(
+            stage_type="local",
+            stage_name="test-stage",
+            metric="input_toxicity",
+            operator="gt",
+            target_value=0.8,
+        )
+        evaluator = Luna2Evaluator(cfg)
+        msg = evaluator._build_message(triggered=False, status="not_triggered", metric_results={})
+        assert "passed" in msg
+        assert "input_toxicity" in msg
+
+    @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"})
+    @patch("agent_control_evaluator_galileo.luna2.evaluator.LUNA2_AVAILABLE", True)
+    def test_build_message_numeric_value(self):
+        """Test message for numeric metric scores."""
+        from agent_control_evaluator_galileo.luna2 import Luna2Evaluator, Luna2EvaluatorConfig
+
+        cfg = Luna2EvaluatorConfig(
+            stage_type="local",
+            stage_name="test-stage",
+            metric="input_toxicity",
+            operator="gt",
+            target_value=0.8,
+        )
+        evaluator = Luna2Evaluator(cfg)
+        metric_results = {"input_toxicity": {"value": 0.95}}
+        msg = evaluator._build_message(triggered=True, status="triggered", metric_results=metric_results)
+        assert "0.95" in msg
+        assert "input_toxicity" in msg
+
+
 class TestGalileoProtectClient:
     """Tests for the GalileoProtectClient HTTP client."""