Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Add document summaries to yourbench_task.py
Browse files
yourbench_space/lighteval_task/yourbench_task.py
CHANGED
@@ -181,7 +181,7 @@ class JudgeLLMYourBench(JudgeLLM):
|
|
181 |
process_judge_response=process_judge_response_yourbench,
|
182 |
judge_backend="inference-providers",
|
183 |
short_judge_name="yourbench_judge",
|
184 |
-
hf_provider="
|
185 |
max_tokens=2048,
|
186 |
)
|
187 |
|
@@ -192,7 +192,7 @@ class JudgeLLMYourBench(JudgeLLM):
|
|
192 |
predictions = [response[0].result[0] for response in responses]
|
193 |
options = [None] * len(questions)
|
194 |
chunks = [formatted_doc.specific["chunks"][0] for formatted_doc in formatted_docs]
|
195 |
-
documents = [formatted_doc.specific["
|
196 |
|
197 |
score, _, _ = self.judge.evaluate_answer_batch(
|
198 |
questions, predictions, options, golds, chunks=chunks, documents=documents
|
@@ -235,6 +235,7 @@ def yourbench_prompt(line, task_name: str = ""):
|
|
235 |
"chunks": line["chunks"],
|
236 |
"question": line["question"],
|
237 |
"document": line["document"],
|
|
|
238 |
},
|
239 |
)
|
240 |
|
|
|
181 |
process_judge_response=process_judge_response_yourbench,
|
182 |
judge_backend="inference-providers",
|
183 |
short_judge_name="yourbench_judge",
|
184 |
+
hf_provider="nebius",
|
185 |
max_tokens=2048,
|
186 |
)
|
187 |
|
|
|
192 |
predictions = [response[0].result[0] for response in responses]
|
193 |
options = [None] * len(questions)
|
194 |
chunks = [formatted_doc.specific["chunks"][0] for formatted_doc in formatted_docs]
|
195 |
+
documents = [formatted_doc.specific["document_summary"] for formatted_doc in formatted_docs]
|
196 |
|
197 |
score, _, _ = self.judge.evaluate_answer_batch(
|
198 |
questions, predictions, options, golds, chunks=chunks, documents=documents
|
|
|
235 |
"chunks": line["chunks"],
|
236 |
"question": line["question"],
|
237 |
"document": line["document"],
|
238 |
+
"document_summary": line["document_summary"],
|
239 |
},
|
240 |
)
|
241 |
|