diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna2/config.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna2/config.py index aced94ab..a3a5bb76 100644 --- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna2/config.py +++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna2/config.py @@ -8,17 +8,17 @@ # Supported Luna-2 metrics Luna2Metric = Literal[ "input_toxicity", - "output_toxicity", + "toxicity", # output toxicity "input_sexism", "output_sexism", "prompt_injection", - "pii_detection", + "pii", # output PII + "input_pii", "hallucination", "tone", ] -# Supported operators -Luna2Operator = Literal["gt", "lt", "gte", "lte", "eq", "contains", "any"] +Luna2Operator = Literal["gt", "lt", "gte", "lte", "eq", "contains", "any", "not_empty"] class Luna2EvaluatorConfig(EvaluatorConfig): @@ -74,7 +74,7 @@ class Luna2EvaluatorConfig(EvaluatorConfig): # Central stage fields stage_name: str | None = Field( default=None, - description="Stage name in Galileo (required for central stage)", + description="Stage name in Galileo (required for both local and central stages)", ) stage_version: int | None = Field( default=None, @@ -113,9 +113,10 @@ def validate_stage_config(self) -> "Luna2EvaluatorConfig": raise ValueError("'metric' is required for local stage") if not self.operator: raise ValueError("'operator' is required for local stage") - if self.target_value is None: + # not_empty / not_null operators don't need a comparison value + if self.target_value is None and self.operator not in ("not_empty", "not_null"): raise ValueError("'target_value' is required for local stage") - elif self.stage_type == "central": - if not self.stage_name: - raise ValueError("'stage_name' is required for central stage") + + if not self.stage_name: + raise ValueError("'stage_name' is required for both central and local stages") return self diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna2/evaluator.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna2/evaluator.py index a6bb146c..5169a5a5 100644 --- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna2/evaluator.py +++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna2/evaluator.py @@ -172,6 +172,20 @@ def _get_numeric_target_value(self) -> float | int | str | None: async def _evaluate_local_stage(self, data: Any) -> EvaluatorResult: """Evaluate using a local stage (runtime rulesets). + We use PASSTHROUGH action so Protect computes the metric and returns + metric_results without making a block decision itself — agent-control + owns that decision via the control's action.decision field. + + Numeric operators (gt, lt, gte, lte, eq): Protect evaluates the rule + server-side and returns status="triggered" when the condition is met, + so _parse_response picks it up directly. + + Categorical operators (not_empty, any): the Protect local-stage rule + engine does not support these operators and always returns + status="not_triggered", even when the metric value is non-empty. + _parse_response falls back to _evaluate_metric_results which evaluates + the condition client-side from the raw metric_results dict. + Args: data: The data to evaluate. @@ -187,7 +201,6 @@ async def _evaluate_local_stage(self, data: Any) -> EvaluatorResult: target_value=self._get_numeric_target_value() or 0, ) - # Create proper Ruleset with PassthroughAction ruleset = Ruleset( rules=[rule], action=PassthroughAction(type="PASSTHROUGH"), @@ -204,6 +217,7 @@ async def _evaluate_local_stage(self, data: Any) -> EvaluatorResult: payload=payload, prioritized_rulesets=[ruleset], project_name=self.config.galileo_project, + stage_name=self.config.stage_name, timeout=self.get_timeout_seconds(), metadata=self.config.metadata or {}, ) @@ -279,10 +293,20 @@ def _prepare_payload(self, data: Any) -> Payload: is_output_metric = "output" in metric if is_output_metric: - return Payload(input="", output=data_str) + payload = Payload(input="", output=data_str) else: # Default to input for central stages or input metrics - return Payload(input=data_str, output="") + payload = Payload(input=data_str, output="") + + logger.debug( + "[Luna2] _prepare_payload: metric=%r payload_field_config=%r " + "→ input=%d chars, output=%d chars", + self.config.metric, + self.config.payload_field, + len(payload.input), + len(payload.output), + ) + return payload def _parse_response(self, response: ProtectResponse | None) -> EvaluatorResult: """Parse Galileo Protect response into EvaluatorResult. @@ -304,16 +328,34 @@ def _parse_response(self, response: ProtectResponse | None) -> EvaluatorResult: status = response.status.lower() if response.status else "unknown" triggered = status == "triggered" + # Numeric operators (gt/lt/etc.) are evaluated server-side by Protect and + # return status="triggered" correctly even with PASSTHROUGH. + # Categorical operators (not_empty, any) are NOT supported by Protect's + # local-stage rule engine — it always returns status="not_triggered" for + # them regardless of the metric value. Fall back to client-side evaluation + # from metric_results for those cases. + if not triggered and response.metric_results: + triggered = self._evaluate_metric_results(response.metric_results) + + logger.info( + "[Luna2] response: status=%r triggered=%s metric_results=%s", + status, + triggered, + response.metric_results, + ) + # Extract trace metadata trace_id = response.trace_metadata.id if response.trace_metadata else None execution_time = response.trace_metadata.execution_time if response.trace_metadata else None received_at = response.trace_metadata.received_at if response.trace_metadata else None response_at = response.trace_metadata.response_at if response.trace_metadata else None + message = self._build_message(triggered, status, response.metric_results) + return EvaluatorResult( matched=triggered, confidence=1.0 if triggered else 0.0, - message=response.text or f"Luna-2 check: {status}", + message=message, metadata={ "status": status, "metric": self.config.metric or "unknown", @@ -324,6 +366,74 @@ def _parse_response(self, response: ProtectResponse | None) -> EvaluatorResult: }, ) + def _build_message(self, triggered: bool, status: str, metric_results: dict) -> str: + """Build a human-readable message from the evaluation result.""" + metric = self.config.metric or "unknown" + + if not triggered: + return f"Luna-2 {metric} check passed" + + result = (metric_results or {}).get(metric, {}) + value = result.get("value") + + if isinstance(value, list) and value: + categories = ", ".join(str(v).replace("_", " ") for v in value) + return f"{metric} detected: {categories}" + if isinstance(value, (int, float)): + return f"{metric} score {value:.2f} exceeded threshold" + + return f"Luna-2 {metric} check triggered" + + def _evaluate_metric_results(self, metric_results: dict) -> bool: + """Evaluate the configured operator/target against raw metric_results. + + Used when the Protect API returns PASSTHROUGH (no server-side trigger) + but we still need to decide whether the rule condition is met. + + Args: + metric_results: The metric_results dict from the Protect API response. + + Returns: + True if the rule condition is satisfied. + """ + metric = self.config.metric + if not metric or metric not in metric_results: + return False + + result = metric_results[metric] + if result.get("status") != "SUCCESS": + return False + + value = result.get("value") + operator = self.config.operator + target = self.config.target_value + + if operator in ("not_empty", "not_null"): + return bool(value) + if operator in ("empty", "is_null"): + return not bool(value) + if operator == "any" and isinstance(value, list): + return target in value if target is not None else bool(value) + if operator == "contains": + return target in value if (value and target is not None) else False + if isinstance(value, (int, float)) and target is not None: + try: + t = float(target) + if operator == "gt": + return value > t + if operator == "gte": + return value >= t + if operator == "lt": + return value < t + if operator == "lte": + return value <= t + if operator == "eq": + return value == t + except (TypeError, ValueError): + pass + + return False + def _handle_error(self, error: Exception) -> EvaluatorResult: """Handle errors from Luna-2 evaluation. diff --git a/evaluators/contrib/galileo/tests/test_luna2_evaluator.py b/evaluators/contrib/galileo/tests/test_luna2_evaluator.py index 0f6e45d7..6e2c5c41 100644 --- a/evaluators/contrib/galileo/tests/test_luna2_evaluator.py +++ b/evaluators/contrib/galileo/tests/test_luna2_evaluator.py @@ -50,6 +50,7 @@ def test_local_stage_config_valid(self): config = Luna2EvaluatorConfig( stage_type="local", + stage_name="test-stage", metric="input_toxicity", operator="gt", target_value="0.5", @@ -68,6 +69,7 @@ def test_local_stage_config_with_numeric_target(self): config = Luna2EvaluatorConfig( stage_type="local", + stage_name="test-stage", metric="input_toxicity", operator="gt", target_value=0.5, # Numeric value @@ -133,6 +135,18 @@ def test_central_stage_requires_stage_name(self): galileo_project="my-project", ) + def test_local_stage_requires_stage_name(self): + """Test local stage also requires stage_name field.""" + from agent_control_evaluator_galileo.luna2 import Luna2EvaluatorConfig + + with pytest.raises(ValidationError, match="stage_name.*required"): + Luna2EvaluatorConfig( + stage_type="local", + metric="input_toxicity", + operator="gt", + target_value="0.5", + ) + def test_timeout_ms_validation(self): """Test timeout_ms must be within valid range.""" from agent_control_evaluator_galileo.luna2 import Luna2EvaluatorConfig @@ -141,7 +155,7 @@ def test_timeout_ms_validation(self): with pytest.raises(ValidationError): Luna2EvaluatorConfig( stage_type="central", - stage_name="test", + stage_name="test-stage", timeout_ms=500, # Below 1000 ) @@ -149,14 +163,14 @@ def test_timeout_ms_validation(self): with pytest.raises(ValidationError): Luna2EvaluatorConfig( stage_type="central", - stage_name="test", + stage_name="test-stage", timeout_ms=100000, # Above 60000 ) # Valid config = Luna2EvaluatorConfig( stage_type="central", - stage_name="test", + stage_name="test-stage", timeout_ms=30000, ) assert config.timeout_ms == 30000 @@ -167,14 +181,14 @@ def test_on_error_validation(self): config_allow = Luna2EvaluatorConfig( stage_type="central", - stage_name="test", + stage_name="test-stage", on_error="allow", ) assert config_allow.on_error == "allow" config_deny = Luna2EvaluatorConfig( stage_type="central", - stage_name="test", + stage_name="test-stage", on_error="deny", ) assert config_deny.on_error == "deny" @@ -182,7 +196,7 @@ def test_on_error_validation(self): with pytest.raises(ValidationError): Luna2EvaluatorConfig( stage_type="central", - stage_name="test", + stage_name="test-stage", on_error="invalid", ) @@ -193,15 +207,16 @@ def test_metric_validation(self): # Valid metrics valid_metrics = [ "input_toxicity", - "output_toxicity", + "toxicity", "prompt_injection", - "pii_detection", + "pii", "hallucination", "tone", ] for metric in valid_metrics: config = Luna2EvaluatorConfig( stage_type="local", + stage_name="test-stage", metric=metric, operator="gt", target_value="0.5", @@ -212,6 +227,7 @@ def test_metric_validation(self): with pytest.raises(ValidationError): Luna2EvaluatorConfig( stage_type="local", + stage_name="test-stage", metric="invalid_metric", operator="gt", target_value="0.5", @@ -225,6 +241,7 @@ def test_operator_validation(self): for op in valid_operators: config = Luna2EvaluatorConfig( stage_type="local", + stage_name="test-stage", metric="input_toxicity", operator=op, target_value="0.5", @@ -234,6 +251,7 @@ def test_operator_validation(self): with pytest.raises(ValidationError): Luna2EvaluatorConfig( stage_type="local", + stage_name="test-stage", metric="input_toxicity", operator="invalid_op", target_value="0.5", @@ -245,6 +263,7 @@ def test_model_dump(self): config = Luna2EvaluatorConfig( stage_type="local", + stage_name="test-stage", metric="input_toxicity", operator="gt", target_value="0.5", @@ -254,11 +273,11 @@ def test_model_dump(self): data = config.model_dump(exclude_none=True) assert data["stage_type"] == "local" + assert data["stage_name"] == "test-stage" assert data["metric"] == "input_toxicity" assert data["operator"] == "gt" assert data["target_value"] == "0.5" assert data["galileo_project"] == "test-project" - assert "stage_name" not in data # None excluded class TestLuna2EvaluatorInheritance: @@ -322,6 +341,7 @@ def test_luna2_evaluator_init_without_api_key_raises_error(self): config = { "stage_type": "local", + "stage_name": "test-stage", "metric": "input_toxicity", "operator": "gt", "target_value": "0.5", @@ -394,6 +414,7 @@ async def test_local_stage_triggered(self): config = { "stage_type": "local", + "stage_name": "test-stage", "metric": "input_toxicity", "operator": "gt", "target_value": 0.8, @@ -433,6 +454,7 @@ async def test_local_stage_not_triggered(self): config = { "stage_type": "local", + "stage_name": "test-stage", "metric": "input_toxicity", "operator": "gt", "target_value": 0.8, @@ -464,6 +486,7 @@ async def test_local_stage_with_timeout_ms(self): config = { "stage_type": "local", + "stage_name": "test-stage", "metric": "input_toxicity", "operator": "gt", "target_value": 0.8, @@ -563,6 +586,7 @@ def test_input_metric_payload(self): config = { "stage_type": "local", + "stage_name": "test-stage", "metric": "input_toxicity", "operator": "gt", "target_value": 0.8, @@ -583,7 +607,8 @@ def test_output_metric_payload(self): config = { "stage_type": "local", - "metric": "output_toxicity", + "stage_name": "test-stage", + "metric": "output_sexism", "operator": "gt", "target_value": 0.7, } @@ -628,6 +653,7 @@ async def test_error_with_fail_open(self): config = { "stage_type": "local", + "stage_name": "test-stage", "metric": "input_toxicity", "operator": "gt", "target_value": 0.8, @@ -658,6 +684,7 @@ async def test_error_with_fail_closed(self): config = { "stage_type": "local", + "stage_name": "test-stage", "metric": "input_toxicity", "operator": "gt", "target_value": 0.8, @@ -688,6 +715,7 @@ async def test_empty_response_handling(self): config = { "stage_type": "local", + "stage_name": "test-stage", "metric": "input_toxicity", "operator": "gt", "target_value": 0.8, @@ -718,6 +746,7 @@ def test_get_timeout_from_config(self): config = { "stage_type": "local", + "stage_name": "test-stage", "metric": "input_toxicity", "operator": "gt", "target_value": "0.5", @@ -735,6 +764,7 @@ def test_get_timeout_from_default(self): config = { "stage_type": "local", + "stage_name": "test-stage", "metric": "input_toxicity", "operator": "gt", "target_value": "0.5", @@ -756,6 +786,7 @@ def test_numeric_target_value_float(self): config = { "stage_type": "local", + "stage_name": "test-stage", "metric": "input_toxicity", "operator": "gt", "target_value": 0.5, @@ -772,6 +803,7 @@ def test_numeric_target_value_int(self): config = { "stage_type": "local", + "stage_name": "test-stage", "metric": "input_toxicity", "operator": "gt", "target_value": 1, @@ -788,6 +820,7 @@ def test_string_target_value_converts_to_float(self): config = { "stage_type": "local", + "stage_name": "test-stage", "metric": "input_toxicity", "operator": "gt", "target_value": "0.75", @@ -797,6 +830,76 @@ def test_string_target_value_converts_to_float(self): assert evaluator._get_numeric_target_value() == 0.75 +class TestLuna2BuildMessage: + """Tests for _build_message helper.""" + + @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) + @patch("agent_control_evaluator_galileo.luna2.evaluator.LUNA2_AVAILABLE", True) + def test_build_message_list_value_uses_metric_name(self): + """Test that list-valued metrics use the metric name, not a hardcoded label.""" + from agent_control_evaluator_galileo.luna2 import Luna2Evaluator + + for metric in ("pii", "input_pii", "tone"): + config = { + "stage_type": "local", + "stage_name": "test-stage", + "metric": metric, + "operator": "not_empty", + "target_value": None, + } + # not_empty doesn't require target_value — skip via not_empty operator path + from agent_control_evaluator_galileo.luna2 import Luna2EvaluatorConfig + cfg = Luna2EvaluatorConfig( + stage_type="local", + stage_name="test-stage", + metric=metric, + operator="not_empty", + ) + evaluator = Luna2Evaluator(cfg) + metric_results = {metric: {"value": ["category_a", "category_b"]}} + msg = evaluator._build_message(triggered=True, status="triggered", metric_results=metric_results) + assert metric in msg + assert "category a" in msg + assert "PII detected" not in msg + + @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) + @patch("agent_control_evaluator_galileo.luna2.evaluator.LUNA2_AVAILABLE", True) + def test_build_message_not_triggered(self): + """Test message when check passes.""" + from agent_control_evaluator_galileo.luna2 import Luna2Evaluator, Luna2EvaluatorConfig + + cfg = Luna2EvaluatorConfig( + stage_type="local", + stage_name="test-stage", + metric="input_toxicity", + operator="gt", + target_value=0.8, + ) + evaluator = Luna2Evaluator(cfg) + msg = evaluator._build_message(triggered=False, status="not_triggered", metric_results={}) + assert "passed" in msg + assert "input_toxicity" in msg + + @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) + @patch("agent_control_evaluator_galileo.luna2.evaluator.LUNA2_AVAILABLE", True) + def test_build_message_numeric_value(self): + """Test message for numeric metric scores.""" + from agent_control_evaluator_galileo.luna2 import Luna2Evaluator, Luna2EvaluatorConfig + + cfg = Luna2EvaluatorConfig( + stage_type="local", + stage_name="test-stage", + metric="input_toxicity", + operator="gt", + target_value=0.8, + ) + evaluator = Luna2Evaluator(cfg) + metric_results = {"input_toxicity": {"value": 0.95}} + msg = evaluator._build_message(triggered=True, status="triggered", metric_results=metric_results) + assert "0.95" in msg + assert "input_toxicity" in msg + + class TestGalileoProtectClient: """Tests for the GalileoProtectClient HTTP client."""