Spaces:
Configuration error
Configuration error
Add my new, shiny module.
Browse files- llm_harness_mistral_arc.py +6 -12
llm_harness_mistral_arc.py
CHANGED
@@ -23,17 +23,11 @@ class llm_harness_mistral_arc(evaluate.Metric):
|
|
23 |
return evaluate.MetricInfo(
|
24 |
# This is the description that will appear on the modules page.
|
25 |
module_type="metric",
|
26 |
-
description=
|
27 |
-
citation=
|
28 |
-
inputs_description=
|
29 |
# This defines the format of each prediction and reference
|
30 |
-
features=
|
31 |
-
datasets.Features(
|
32 |
-
{
|
33 |
-
"pretrained": datasets.Value("string", id="sequence"),
|
34 |
-
"tasks": datasets.Sequence(datasets.Value("string", id="sequence"), id="tasks"),
|
35 |
-
}
|
36 |
-
)],
|
37 |
# Homepage of the module for documentation
|
38 |
homepage="http://module.homepage",
|
39 |
# Additional links to the codebase or references
|
@@ -41,7 +35,7 @@ class llm_harness_mistral_arc(evaluate.Metric):
|
|
41 |
reference_urls=["http://path.to.reference.url/new_module"]
|
42 |
)
|
43 |
|
44 |
-
def _compute(self, pretrained, tasks):
|
45 |
outputs = lm_eval.simple_evaluate(
|
46 |
model="hf",
|
47 |
model_args={"pretrained":pretrained},
|
@@ -50,6 +44,6 @@ class llm_harness_mistral_arc(evaluate.Metric):
|
|
50 |
)
|
51 |
results = {}
|
52 |
for task in outputs['results']:
|
53 |
-
|
54 |
'acc_norm':outputs['results'][task]['acc_norm,none']}
|
55 |
return results
|
|
|
23 |
return evaluate.MetricInfo(
|
24 |
# This is the description that will appear on the modules page.
|
25 |
module_type="metric",
|
26 |
+
description=_DESCRIPTION,
|
27 |
+
citation=_CITATION,
|
28 |
+
inputs_description=_KWARGS_DESCRIPTION,
|
29 |
# This defines the format of each prediction and reference
|
30 |
+
features={},
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
# Homepage of the module for documentation
|
32 |
homepage="http://module.homepage",
|
33 |
# Additional links to the codebase or references
|
|
|
35 |
reference_urls=["http://path.to.reference.url/new_module"]
|
36 |
)
|
37 |
|
38 |
+
def _compute(self, pretrained=None, tasks=[]):
|
39 |
outputs = lm_eval.simple_evaluate(
|
40 |
model="hf",
|
41 |
model_args={"pretrained":pretrained},
|
|
|
44 |
)
|
45 |
results = {}
|
46 |
for task in outputs['results']:
|
47 |
+
results[task] = {'acc':outputs['results'][task]['acc,none'],
|
48 |
'acc_norm':outputs['results'][task]['acc_norm,none']}
|
49 |
return results
|