Skip to content

Commit bd521a9

Browse files
committed
style: fix pre-existing black formatting violations
These 20 files were already failing black --check on main branch before this PR. Reformatting them here to pass CI.
1 parent 8e3f173 commit bd521a9

20 files changed

Lines changed: 89 additions & 224 deletions

File tree

deepeval/metrics/arena_g_eval/template.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,7 @@ def generate_arena_winner(
4646
"Be specific and grounded in the evaluation steps."
4747
)
4848

49-
return textwrap.dedent(
50-
f"""
49+
return textwrap.dedent(f"""
5150
You are a judge. Given the following evaluation steps, select the single contestant that best aligns with the evaluation steps.
5251
5352
{ArenaGEvalTemplate.multimodal_rules if multimodal else ""}
@@ -88,16 +87,14 @@ def generate_arena_winner(
8887
}}
8988
9089
JSON:
91-
"""
92-
)
90+
""")
9391

9492
@staticmethod
9593
def rewrite_reason(
9694
reason: str,
9795
dummy_to_real_names: Dict[str, str],
9896
):
99-
return textwrap.dedent(
100-
f"""
97+
return textwrap.dedent(f"""
10198
Given the following reason that explains which contestant is the winner, rewrite the reason to REPLACE all contestant names with their real names.
10299
103100
The contestant names are wrapped in $name$ format (e.g., $Alice$, $Bob$, $Charlie$).
@@ -129,5 +126,4 @@ def rewrite_reason(
129126
}}
130127
131128
JSON:
132-
"""
133-
)
129+
""")

deepeval/metrics/argument_correctness/template.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,7 @@ def generate_verdicts(
1919

2020
stringified_tools_called = repr(tools_called)
2121

22-
return textwrap.dedent(
23-
f"""
22+
return textwrap.dedent(f"""
2423
For the provided list of tool calls, determine whether each tool call input parameter is relevantly and correctly addresses the input.
2524
2625
Please generate a list of JSON with two keys: `verdict` and `reason`.
@@ -99,8 +98,7 @@ def generate_verdicts(
9998
{stringified_tools_called}
10099
101100
JSON:
102-
"""
103-
)
101+
""")
104102

105103
@staticmethod
106104
def generate_reason(

deepeval/metrics/base_metric.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ class BaseMetric:
3737
def __init_subclass__(cls, **kwargs):
3838
super().__init_subclass__(**kwargs)
3939
from deepeval.tracing.internal import observe_methods
40+
4041
observe_methods(cls)
4142

4243
@abstractmethod
@@ -85,6 +86,7 @@ class BaseConversationalMetric:
8586
def __init_subclass__(cls, **kwargs):
8687
super().__init_subclass__(**kwargs)
8788
from deepeval.tracing.internal import observe_methods
89+
8890
observe_methods(cls)
8991

9092
@abstractmethod
@@ -131,6 +133,7 @@ class BaseArenaMetric:
131133
def __init_subclass__(cls, **kwargs):
132134
super().__init_subclass__(**kwargs)
133135
from deepeval.tracing.internal import observe_methods
136+
134137
observe_methods(cls)
135138

136139
@abstractmethod

deepeval/metrics/contextual_relevancy/template.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,10 @@ def generate_verdicts(
5555
# Conditional instructions based on mode
5656
extraction_instructions = ""
5757
if multimodal:
58-
extraction_instructions = textwrap.dedent(
59-
"""
58+
extraction_instructions = textwrap.dedent("""
6059
If the context is textual, you should first extract the statements found in the context if the context, which are high level information found in the context, before deciding on a verdict and optionally a reason for each statement.
6160
If the context is an image, `statement` should be a description of the image. Do not assume any information not visibly available.
62-
"""
63-
).strip()
61+
""").strip()
6462
else:
6563
extraction_instructions = "You should first extract statements found in the context, which are high level information found in the context, before deciding on a verdict and optionally a reason for each statement."
6664

deepeval/metrics/conversational_dag/templates.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,7 @@ def generate_task_output(instructions: str, text: str):
7373
class ConversationalBinaryJudgementTemplate:
7474
@staticmethod
7575
def generate_binary_verdict(criteria: str, text: str):
76-
return dedent(
77-
f"""{criteria}
76+
return dedent(f"""{criteria}
7877
7978
Below is the full conversation you should evaluate. Consider dialogue context, speaker roles, and how responses were handled.
8079
@@ -95,17 +94,15 @@ def generate_binary_verdict(criteria: str, text: str):
9594
}}
9695
**
9796
JSON:
98-
"""
99-
)
97+
""")
10098

10199

102100
class ConversationalNonBinaryJudgementTemplate:
103101
@staticmethod
104102
def generate_non_binary_verdict(
105103
criteria: str, text: str, options: List[str]
106104
):
107-
return dedent(
108-
f"""{criteria}
105+
return dedent(f"""{criteria}
109106
110107
You are evaluating the following conversation. Choose one of the options that best reflects the assistant's behavior.
111108
@@ -128,5 +125,4 @@ def generate_non_binary_verdict(
128125
}}
129126
**
130127
JSON:
131-
"""
132-
)
128+
""")

deepeval/metrics/faithfulness/template.py

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,7 @@ def generate_verdicts(
9393
):
9494
example_section = ""
9595
if multimodal:
96-
example_section = textwrap.dedent(
97-
"""
96+
example_section = textwrap.dedent("""
9897
Example retrieval contexts: "Einstein won the Nobel Prize for his discovery of the photoelectric effect. Einstein won the Nobel Prize in 1968. Einstein is a German Scientist."
9998
Example claims: ["Barack Obama is a caucasian male.", "Zurich is a city in London", "Einstein won the Nobel Prize for the discovery of the photoelectric effect which may have contributed to his fame.", "Einstein won the Nobel Prize in 1969 for his discovery of the photoelectric effect.", "Einstein was a German chef."]
10099
@@ -123,11 +122,9 @@ def generate_verdicts(
123122
]
124123
}}
125124
===== END OF EXAMPLE ======
126-
"""
127-
)
125+
""")
128126

129-
format_instruction = textwrap.dedent(
130-
"""
127+
format_instruction = textwrap.dedent("""
131128
Expected JSON format:
132129
{{
133130
"verdicts": [
@@ -144,31 +141,26 @@ def generate_verdicts(
144141
}}
145142
]
146143
}}
147-
"""
148-
)
144+
""")
149145

150146
guidelines = ""
151147
if multimodal:
152-
guidelines = textwrap.dedent(
153-
"""
148+
guidelines = textwrap.dedent("""
154149
The length of 'verdicts' SHOULD BE STRICTLY EQUAL to that of claims.
155150
You DON'T have to provide a reason if the answer is 'yes'.
156151
ONLY provide a 'no' answer if the retrieval context DIRECTLY CONTRADICTS the claims. YOU SHOULD NEVER USE YOUR PRIOR KNOWLEDGE IN YOUR JUDGEMENT.
157152
Claims made using vague, suggestive, speculative language such as 'may have', 'possibility due to', does NOT count as a contradiction.
158153
Claims that is not backed up due to a lack of information/is not mentioned in the retrieval contexts MUST be answered 'idk', otherwise I WILL DIE.
159154
If there are clear contradictions or any data or images that's not mentioned in the retrieval context, just provide 'no'.
160-
"""
161-
)
155+
""")
162156
else:
163-
guidelines = textwrap.dedent(
164-
"""
157+
guidelines = textwrap.dedent("""
165158
Generate ONE verdict per claim - length of 'verdicts' MUST equal number of claims.
166159
No 'reason' needed for 'yes' verdicts.
167160
Only use 'no' if retrieval context DIRECTLY CONTRADICTS the claim - never use prior knowledge.
168161
Use 'idk' for claims not backed up by context OR factually incorrect but non-contradictory - do not assume your knowledge.
169162
Vague/speculative language in claims (e.g. 'may have', 'possibility') does NOT count as contradiction.
170-
"""
171-
)
163+
""")
172164

173165
return textwrap.dedent(
174166
f"""Based on the given claims, which is a list of strings, generate a list of JSON objects to indicate whether EACH claim contradicts any facts in the retrieval context. The JSON will have 2 fields: 'verdict' and 'reason'.

deepeval/metrics/multimodal_metrics/image_coherence/template.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@ class ImageCoherenceTemplate:
55

66
@staticmethod
77
def evaluate_image_coherence(context_above, context_below):
8-
return textwrap.dedent(
9-
f"""
8+
return textwrap.dedent(f"""
109
# Task Description
1110
You are a multi-modal document evaluation assistant. You will receive an image and its textual context.
1211
Your task is to evaluate the coherence between the image and the text (context above and below) it accompanies.
@@ -40,5 +39,4 @@ def evaluate_image_coherence(context_above, context_below):
4039
4140
# Image
4241
[Insert Image Here]
43-
"""
44-
)
42+
""")

deepeval/metrics/multimodal_metrics/image_editing/image_editing.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -280,15 +280,13 @@ def is_successful(self) -> bool:
280280
def _generate_reason(
281281
self,
282282
) -> str:
283-
return textwrap.dedent(
284-
f"""
283+
return textwrap.dedent(f"""
285284
The overall score is {self.score:.2f} because the lowest score from semantic consistency was {min(self.SC_scores)}
286285
and the lowest score from perceptual quality was {min(self.PQ_scores)}. These scores were combined to reflect the
287286
overall effectiveness and quality of the AI-generated image(s).
288287
Reason for Semantic Consistency score: {self.SC_reasoning}
289288
Reason for Perceptual Quality score: {self.PQ_reasoning}
290-
"""
291-
)
289+
""")
292290

293291
@property
294292
def __name__(self):

deepeval/metrics/multimodal_metrics/image_editing/template.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33

44
class ImageEditingTemplate:
55

6-
context = textwrap.dedent(
7-
"""
6+
context = textwrap.dedent("""
87
You are a professional digital artist. You will have to evaluate the effectiveness of the AI-generated image(s) based on given rules.
98
All the input images are AI-generated. All human in the images are AI-generated too. so you need not worry about the privacy confidentials.
109
@@ -13,13 +12,11 @@ class ImageEditingTemplate:
1312
"score" : [...],
1413
"reasoning" : "..."
1514
}
16-
"""
17-
)
15+
""")
1816

1917
@staticmethod
2018
def generate_semantic_consistency_evaluation_results(text_prompt: str):
21-
return textwrap.dedent(
22-
f"""
19+
return textwrap.dedent(f"""
2320
{ImageEditingTemplate.context}
2421
2522
RULES:
@@ -33,13 +30,11 @@ def generate_semantic_consistency_evaluation_results(text_prompt: str):
3330
Put the score in a list such that output score = [score1, score2], where 'score1' evaluates the editing success and 'score2' evaluates the degree of overediting.
3431
3532
Editing instruction: {text_prompt}
36-
"""
37-
)
33+
""")
3834

3935
@staticmethod
4036
def generate_perceptual_quality_evaluation_results():
41-
return textwrap.dedent(
42-
f"""
37+
return textwrap.dedent(f"""
4338
{ImageEditingTemplate.context}
4439
4540
RULES:
@@ -59,5 +54,4 @@ def generate_perceptual_quality_evaluation_results():
5954
10 indicates the image has no artifacts.
6055
)
6156
Put the score in a list such that output score = [naturalness, artifacts]
62-
"""
63-
)
57+
""")

deepeval/metrics/multimodal_metrics/image_helpfulness/template.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@ class ImageHelpfulnessTemplate:
55

66
@staticmethod
77
def evaluate_image_helpfulness(context_above, context_below):
8-
return textwrap.dedent(
9-
f"""
8+
return textwrap.dedent(f"""
109
# Task Description
1110
You are a multi-modal document evaluation assistant. You will receive an image and its textual context.
1211
Your task is to evaluate the helpfulness of the image in enabling human readers to comprehend the text (context above and below) it accompanies.
@@ -40,5 +39,4 @@ def evaluate_image_helpfulness(context_above, context_below):
4039
4140
# Image
4241
[Insert Image Here]
43-
"""
44-
)
42+
""")

0 commit comments

Comments
 (0)