Skip to content

Commit 36c86f0

Browse files
committed
added tags and metadata to g-eval
1 parent 2b56c51 commit 36c86f0

137 files changed

Lines changed: 1160 additions & 1104 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -220,13 +220,13 @@ Open `test_chatbot.py` and write your first test case to run an **end-to-end** e
220220
import pytest
221221
from deepeval import assert_test
222222
from deepeval.metrics import GEval
223-
from deepeval.test_case import LLMTestCase, LLMTestCaseParams
223+
from deepeval.test_case import LLMTestCase, SingleTurnParams
224224

225225
def test_case():
226226
correctness_metric = GEval(
227227
name="Correctness",
228228
criteria="Determine if the 'actual output' is correct based on the 'expected output'.",
229-
evaluation_params=[LLMTestCaseParams.ACTUAL_OUTPUT, LLMTestCaseParams.EXPECTED_OUTPUT],
229+
evaluation_params=[SingleTurnParams.ACTUAL_OUTPUT, SingleTurnParams.EXPECTED_OUTPUT],
230230
threshold=0.5
231231
)
232232
test_case = LLMTestCase(
@@ -268,14 +268,14 @@ Use the `@observe` decorator to trace components (LLM calls, retrievers, tool ca
268268

269269
```python
270270
from deepeval.tracing import observe, update_current_span
271-
from deepeval.test_case import LLMTestCase, LLMTestCaseParams
271+
from deepeval.test_case import LLMTestCase, SingleTurnParams
272272
from deepeval.dataset import EvaluationDataset, Golden
273273
from deepeval.metrics import GEval
274274

275275
correctness = GEval(
276276
name="Correctness",
277277
criteria="Determine if the 'actual output' is correct based on the 'expected output'.",
278-
evaluation_params=[LLMTestCaseParams.ACTUAL_OUTPUT, LLMTestCaseParams.EXPECTED_OUTPUT],
278+
evaluation_params=[SingleTurnParams.ACTUAL_OUTPUT, SingleTurnParams.EXPECTED_OUTPUT],
279279
)
280280

281281
@observe(metrics=[correctness])

deepeval/cli/main.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,6 @@ def login(
270270
settings = get_settings()
271271
save = save or settings.DEEPEVAL_DEFAULT_SAVE or "dotenv:.env.local"
272272
with settings.edit(save=save) as edit_ctx:
273-
settings.API_KEY = key
274273
settings.CONFIDENT_API_KEY = key
275274

276275
handled, path, updated = edit_ctx.result
@@ -335,7 +334,6 @@ def logout(
335334
settings = get_settings()
336335
save = save or settings.DEEPEVAL_DEFAULT_SAVE or "dotenv:.env.local"
337336
with settings.edit(save=save) as edit_ctx:
338-
settings.API_KEY = None
339337
settings.CONFIDENT_API_KEY = None
340338

341339
handled, path, updated = edit_ctx.result

deepeval/confident/api.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def get_base_api_url():
7979

8080
def get_confident_api_key() -> Optional[str]:
8181
s = get_settings()
82-
key: Optional[SecretStr] = s.CONFIDENT_API_KEY or s.API_KEY
82+
key: Optional[SecretStr] = s.CONFIDENT_API_KEY
8383
return key.get_secret_value() if key else None
8484

8585

@@ -98,17 +98,14 @@ def set_confident_api_key(api_key: Optional[str]) -> None:
9898
if save is None:
9999
with s.edit(persist=False):
100100
s.CONFIDENT_API_KEY = SecretStr(api_key) if api_key else None
101-
s.API_KEY = SecretStr(api_key) if api_key else None
102101
else:
103102
# Respect default save: update runtime + write to dotenv, but not JSON
104103
with s.edit(save=save, persist=None):
105104
s.CONFIDENT_API_KEY = SecretStr(api_key) if api_key else None
106-
s.API_KEY = SecretStr(api_key) if api_key else None
107105

108106

109107
def is_confident():
110-
confident_api_key = get_confident_api_key()
111-
return confident_api_key is not None
108+
return get_confident_api_key() is not None
112109

113110

114111
def log_retry_error(retry_state: RetryCallState):

deepeval/config/settings.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -377,10 +377,6 @@ def __setattr__(self, name: str, value):
377377
# Model Keys
378378
#
379379

380-
API_KEY: Optional[SecretStr] = Field(
381-
None,
382-
description="Alias for CONFIDENT_API_KEY (Confident AI API key).",
383-
)
384380
CONFIDENT_API_KEY: Optional[SecretStr] = Field(
385381
None,
386382
description="Confident AI API key (used for uploading results/telemetry to Confident).",

deepeval/dataset/dataset.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ def get_column_data(df: pd.DataFrame, col_name: str, default=None):
350350
raise ValueError(f"Error processing expected_tools: {e}")
351351
else:
352352
expected_tools.append([])
353-
additional_metadatas = [
353+
metadatas = [
354354
ast.literal_eval(metadata) if metadata else None
355355
for metadata in get_column_data(
356356
df, additional_metadata_col_name, default=""
@@ -365,7 +365,7 @@ def get_column_data(df: pd.DataFrame, col_name: str, default=None):
365365
retrieval_context,
366366
tools_called,
367367
expected_tools,
368-
additional_metadata,
368+
metadata,
369369
) in zip(
370370
inputs,
371371
actual_outputs,
@@ -374,7 +374,7 @@ def get_column_data(df: pd.DataFrame, col_name: str, default=None):
374374
retrieval_contexts,
375375
tools_called,
376376
expected_tools,
377-
additional_metadatas,
377+
metadatas,
378378
):
379379
self.add_test_case(
380380
LLMTestCase(
@@ -385,7 +385,7 @@ def get_column_data(df: pd.DataFrame, col_name: str, default=None):
385385
retrieval_context=retrieval_context,
386386
tools_called=tools_called,
387387
expected_tools=expected_tools,
388-
additional_metadata=additional_metadata,
388+
metadata=metadata,
389389
)
390390
)
391391

@@ -575,7 +575,7 @@ def get_column_data(df: pd.DataFrame, col_name: str, default=None):
575575
comments = get_column_data(df, comments_key_name)
576576
name = get_column_data(df, name_key_name)
577577
source_files = get_column_data(df, source_file_col_name)
578-
additional_metadatas = [
578+
metadatas = [
579579
ast.literal_eval(metadata) if metadata else None
580580
for metadata in get_column_data(
581581
df, additional_metadata_col_name, default=""
@@ -597,7 +597,7 @@ def get_column_data(df: pd.DataFrame, col_name: str, default=None):
597597
comments,
598598
name,
599599
source_file,
600-
additional_metadata,
600+
metadata,
601601
scenario,
602602
turns,
603603
expected_outcome,
@@ -613,7 +613,7 @@ def get_column_data(df: pd.DataFrame, col_name: str, default=None):
613613
comments,
614614
name,
615615
source_files,
616-
additional_metadatas,
616+
metadatas,
617617
scenarios,
618618
turns_raw,
619619
expected_outcomes,
@@ -630,7 +630,7 @@ def get_column_data(df: pd.DataFrame, col_name: str, default=None):
630630
context=context,
631631
comments=comments,
632632
name=name,
633-
additional_metadata=additional_metadata,
633+
additional_metadata=metadata,
634634
)
635635
)
636636
else:
@@ -643,7 +643,7 @@ def get_column_data(df: pd.DataFrame, col_name: str, default=None):
643643
retrieval_context=retrieval_context,
644644
tools_called=tools_called,
645645
expected_tools=expected_tools,
646-
additional_metadata=additional_metadata,
646+
additional_metadata=metadata,
647647
source_file=source_file,
648648
comments=comments,
649649
name=name,
@@ -688,7 +688,7 @@ def add_goldens_from_json_file(
688688
comments = json_obj.get(comments_key_name)
689689
name = json_obj.get(name_key_name)
690690
parsed_turns = parse_turns(turns) if turns else []
691-
additional_metadata = json_obj.get(additional_metadata_key_name)
691+
metadata = json_obj.get(additional_metadata_key_name)
692692

693693
self.add_golden(
694694
ConversationalGolden(
@@ -699,7 +699,7 @@ def add_goldens_from_json_file(
699699
context=context,
700700
comments=comments,
701701
name=name,
702-
additional_metadata=additional_metadata,
702+
additional_metadata=metadata,
703703
)
704704
)
705705
else:
@@ -713,7 +713,7 @@ def add_goldens_from_json_file(
713713
comments = json_obj.get(comments_key_name)
714714
name = json_obj.get(name_key_name)
715715
source_file = json_obj.get(source_file_key_name)
716-
additional_metadata = json_obj.get(additional_metadata_key_name)
716+
metadata = json_obj.get(additional_metadata_key_name)
717717

718718
self.add_golden(
719719
Golden(
@@ -724,7 +724,7 @@ def add_goldens_from_json_file(
724724
retrieval_context=retrieval_context,
725725
tools_called=tools_called,
726726
expected_tools=expected_tools,
727-
additional_metadata=additional_metadata,
727+
additional_metadata=metadata,
728728
comments=comments,
729729
name=name,
730730
source_file=source_file,
@@ -803,7 +803,7 @@ def parse_tools(value):
803803
comments = json_obj.get(comments_key_name)
804804
name = json_obj.get(name_key_name)
805805
parsed_turns = parse_turns(turns) if turns else []
806-
additional_metadata = json_obj.get(additional_metadata_key_name)
806+
metadata = json_obj.get(additional_metadata_key_name)
807807
custom_column_key_values = json_obj.get(
808808
custom_column_key_values_key_name
809809
)
@@ -817,7 +817,7 @@ def parse_tools(value):
817817
context=context,
818818
comments=comments,
819819
name=name,
820-
additional_metadata=additional_metadata,
820+
additional_metadata=metadata,
821821
custom_column_key_values=custom_column_key_values,
822822
)
823823
)
@@ -839,7 +839,7 @@ def parse_tools(value):
839839
comments = json_obj.get(comments_key_name)
840840
name = json_obj.get(name_key_name)
841841
source_file = json_obj.get(source_file_key_name)
842-
additional_metadata = json_obj.get(additional_metadata_key_name)
842+
metadata = json_obj.get(additional_metadata_key_name)
843843
custom_column_key_values = json_obj.get(
844844
custom_column_key_values_key_name
845845
)
@@ -853,7 +853,7 @@ def parse_tools(value):
853853
retrieval_context=retrieval_context,
854854
tools_called=tools_called,
855855
expected_tools=expected_tools,
856-
additional_metadata=additional_metadata,
856+
additional_metadata=metadata,
857857
custom_column_key_values=custom_column_key_values,
858858
comments=comments,
859859
name=name,

deepeval/dataset/utils.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def convert_test_cases_to_goldens(
2424
"retrieval_context": test_case.retrieval_context,
2525
"tools_called": test_case.tools_called,
2626
"expected_tools": test_case.expected_tools,
27-
"additional_metadata": test_case.additional_metadata,
27+
"additional_metadata": test_case.metadata,
2828
}
2929
goldens.append(Golden(**golden))
3030
return goldens
@@ -47,7 +47,7 @@ def convert_goldens_to_test_cases(
4747
expected_tools=golden.expected_tools,
4848
name=golden.name,
4949
comments=golden.comments,
50-
additional_metadata=golden.additional_metadata,
50+
metadata=golden.additional_metadata,
5151
_dataset_alias=_alias,
5252
_dataset_id=_id,
5353
_dataset_rank=index,
@@ -71,7 +71,7 @@ def convert_convo_test_cases_to_convo_goldens(
7171
"expected_outcome": test_case.expected_outcome,
7272
"user_description": test_case.user_description,
7373
"context": test_case.context,
74-
"additional_metadata": test_case.additional_metadata,
74+
"additional_metadata": test_case.metadata,
7575
}
7676
goldens.append(ConversationalGolden(**golden))
7777
return goldens
@@ -91,7 +91,7 @@ def convert_convo_goldens_to_convo_test_cases(
9191
user_description=golden.user_description,
9292
context=golden.context,
9393
name=golden.name,
94-
additional_metadata=golden.additional_metadata,
94+
metadata=golden.additional_metadata,
9595
comments=golden.comments,
9696
_dataset_alias=_alias,
9797
_dataset_id=_id,
@@ -141,9 +141,7 @@ def _dump_list(models):
141141
"mcp_tools_called": _dump_list(turn.mcp_tools_called),
142142
"mcp_resources_called": _dump_list(turn.mcp_resources_called),
143143
"mcp_prompts_called": _dump_list(turn.mcp_prompts_called),
144-
"additional_metadata": (
145-
turn.additional_metadata if turn.additional_metadata else None
146-
),
144+
"metadata": turn.metadata if turn.metadata else None,
147145
}
148146
res.append(cur_turn)
149147
try:

deepeval/evaluate/execute/agentic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ async def _a_execute_agentic_test_case(
124124
retrieval_context=current_trace.retrieval_context,
125125
tools_called=current_trace.tools_called,
126126
expected_tools=current_trace.expected_tools,
127-
additional_metadata=golden.additional_metadata,
127+
metadata=golden.additional_metadata,
128128
comments=golden.comments,
129129
name=golden.name,
130130
_dataset_alias=golden._dataset_alias,
@@ -243,7 +243,7 @@ async def dfs(trace: Trace, span: BaseSpan):
243243
expected_output=None,
244244
context=None,
245245
retrieval_context=None,
246-
additional_metadata=golden.additional_metadata,
246+
metadata=golden.additional_metadata,
247247
tools_called=None,
248248
expected_tools=None,
249249
comments=golden.comments,

deepeval/evaluate/execute/loop.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ def evaluate_test_cases(
217217
expected_output=current_trace.expected_output,
218218
context=current_trace.context,
219219
retrieval_context=current_trace.retrieval_context,
220-
additional_metadata=golden.additional_metadata,
220+
metadata=golden.additional_metadata,
221221
tools_called=current_trace.tools_called,
222222
expected_tools=current_trace.expected_tools,
223223
comments=golden.comments,

deepeval/evaluate/execute/trace_scope.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def _assert_test_from_current_trace(
142142
expected_output=current_trace.expected_output,
143143
context=current_trace.context,
144144
retrieval_context=current_trace.retrieval_context,
145-
additional_metadata=golden.additional_metadata,
145+
metadata=golden.additional_metadata,
146146
tools_called=current_trace.tools_called,
147147
expected_tools=current_trace.expected_tools,
148148
comments=golden.comments,

deepeval/evaluate/types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class TestResult:
2323
context: Optional[List[str]] = None
2424
retrieval_context: Optional[List[str]] = None
2525
turns: Optional[List[TurnApi]] = None
26-
additional_metadata: Optional[Dict] = None
26+
metadata: Optional[Dict] = None
2727

2828

2929
class EvaluationResult(BaseModel):

0 commit comments

Comments
 (0)