Text Generation
Transformers
Safetensors
mistral
Not-For-All-Audiences
nsfw
text-generation-inference
Inference Endpoints
{ | |
"results": { | |
"arc_challenge": { | |
"acc,none": 0.5930034129692833, | |
"acc_stderr,none": 0.014356399418009123, | |
"acc_norm,none": 0.6143344709897611, | |
"acc_norm_stderr,none": 0.014224250973257177, | |
"alias": "arc_challenge" | |
}, | |
"arc_easy": { | |
"acc,none": 0.8379629629629629, | |
"acc_stderr,none": 0.007561148218715585, | |
"acc_norm,none": 0.8265993265993266, | |
"acc_norm_stderr,none": 0.007768570412816704, | |
"alias": "arc_easy" | |
}, | |
"gsm8k": { | |
"exact_match,get-answer": 0.5890826383623957, | |
"exact_match_stderr,get-answer": 0.013552132901423226, | |
"alias": "gsm8k" | |
}, | |
"hellaswag": { | |
"acc,none": 0.6665006970722963, | |
"acc_stderr,none": 0.00470499629414501, | |
"acc_norm,none": 0.8445528779127663, | |
"acc_norm_stderr,none": 0.003615898928269306, | |
"alias": "hellaswag" | |
}, | |
"piqa": { | |
"acc,none": 0.8128400435255713, | |
"acc_stderr,none": 0.009100273290473547, | |
"acc_norm,none": 0.8264417845484222, | |
"acc_norm_stderr,none": 0.008836375101386922, | |
"alias": "piqa" | |
}, | |
"truthfulqa_mc2": { | |
"acc,none": 0.6089171791978354, | |
"acc_stderr,none": 0.015669761019363578, | |
"alias": "truthfulqa_mc2" | |
}, | |
"winogrande": { | |
"acc,none": 0.7426992896606156, | |
"acc_stderr,none": 0.012285989618865702, | |
"alias": "winogrande" | |
} | |
}, | |
"configs": { | |
"arc_challenge": { | |
"task": "arc_challenge", | |
"group": [ | |
"ai2_arc" | |
], | |
"dataset_path": "ai2_arc", | |
"dataset_name": "ARC-Challenge", | |
"training_split": "train", | |
"validation_split": "validation", | |
"test_split": "test", | |
"doc_to_text": "Question: {{question}}\nAnswer:", | |
"doc_to_target": "{{choices.label.index(answerKey)}}", | |
"doc_to_choice": "{{choices.text}}", | |
"description": "", | |
"target_delimiter": " ", | |
"fewshot_delimiter": "\n\n", | |
"metric_list": [ | |
{ | |
"metric": "acc", | |
"aggregation": "mean", | |
"higher_is_better": true | |
}, | |
{ | |
"metric": "acc_norm", | |
"aggregation": "mean", | |
"higher_is_better": true | |
} | |
], | |
"output_type": "multiple_choice", | |
"repeats": 1, | |
"should_decontaminate": true, | |
"doc_to_decontamination_query": "Question: {{question}}\nAnswer:", | |
"metadata": [ | |
{ | |
"version": 1.0 | |
} | |
] | |
}, | |
"arc_easy": { | |
"task": "arc_easy", | |
"group": [ | |
"ai2_arc" | |
], | |
"dataset_path": "ai2_arc", | |
"dataset_name": "ARC-Easy", | |
"training_split": "train", | |
"validation_split": "validation", | |
"test_split": "test", | |
"doc_to_text": "Question: {{question}}\nAnswer:", | |
"doc_to_target": "{{choices.label.index(answerKey)}}", | |
"doc_to_choice": "{{choices.text}}", | |
"description": "", | |
"target_delimiter": " ", | |
"fewshot_delimiter": "\n\n", | |
"metric_list": [ | |
{ | |
"metric": "acc", | |
"aggregation": "mean", | |
"higher_is_better": true | |
}, | |
{ | |
"metric": "acc_norm", | |
"aggregation": "mean", | |
"higher_is_better": true | |
} | |
], | |
"output_type": "multiple_choice", | |
"repeats": 1, | |
"should_decontaminate": true, | |
"doc_to_decontamination_query": "Question: {{question}}\nAnswer:", | |
"metadata": [ | |
{ | |
"version": 1.0 | |
} | |
] | |
}, | |
"gsm8k": { | |
"task": "gsm8k", | |
"group": [ | |
"math_word_problems" | |
], | |
"dataset_path": "gsm8k", | |
"dataset_name": "main", | |
"training_split": "train", | |
"test_split": "test", | |
"fewshot_split": "train", | |
"doc_to_text": "Question: {{question}}\nAnswer:", | |
"doc_to_target": "{{answer}}", | |
"description": "", | |
"target_delimiter": " ", | |
"fewshot_delimiter": "\n\n", | |
"num_fewshot": 5, | |
"metric_list": [ | |
{ | |
"metric": "exact_match", | |
"aggregation": "mean", | |
"higher_is_better": true, | |
"ignore_case": true, | |
"ignore_punctuation": false, | |
"regexes_to_ignore": [ | |
",", | |
"\\$", | |
"(?s).*#### " | |
] | |
} | |
], | |
"output_type": "generate_until", | |
"generation_kwargs": { | |
"until": [ | |
"\n\n", | |
"Question:" | |
], | |
"do_sample": false, | |
"temperature": 0.0 | |
}, | |
"repeats": 1, | |
"filter_list": [ | |
{ | |
"name": "get-answer", | |
"filter": [ | |
{ | |
"function": "regex", | |
"regex_pattern": "#### (\\-?[0-9\\.\\,]+)" | |
}, | |
{ | |
"function": "take_first" | |
} | |
] | |
} | |
], | |
"should_decontaminate": false, | |
"metadata": [ | |
{ | |
"version": 1.0 | |
} | |
] | |
}, | |
"hellaswag": { | |
"task": "hellaswag", | |
"group": [ | |
"multiple_choice" | |
], | |
"dataset_path": "hellaswag", | |
"training_split": "train", | |
"validation_split": "validation", | |
"process_docs": "<function process_docs at 0x7fa89ca84ca0>", | |
"doc_to_text": "{{query}}", | |
"doc_to_target": "{{label}}", | |
"doc_to_choice": "choices", | |
"description": "", | |
"target_delimiter": " ", | |
"fewshot_delimiter": "\n\n", | |
"metric_list": [ | |
{ | |
"metric": "acc", | |
"aggregation": "mean", | |
"higher_is_better": true | |
}, | |
{ | |
"metric": "acc_norm", | |
"aggregation": "mean", | |
"higher_is_better": true | |
} | |
], | |
"output_type": "multiple_choice", | |
"repeats": 1, | |
"should_decontaminate": false, | |
"metadata": [ | |
{ | |
"version": 1.0 | |
} | |
] | |
}, | |
"piqa": { | |
"task": "piqa", | |
"dataset_path": "piqa", | |
"training_split": "train", | |
"validation_split": "validation", | |
"doc_to_text": "Question: {{goal}}\nAnswer:", | |
"doc_to_target": "label", | |
"doc_to_choice": "{{[sol1, sol2]}}", | |
"description": "", | |
"target_delimiter": " ", | |
"fewshot_delimiter": "\n\n", | |
"metric_list": [ | |
{ | |
"metric": "acc", | |
"aggregation": "mean", | |
"higher_is_better": true | |
}, | |
{ | |
"metric": "acc_norm", | |
"aggregation": "mean", | |
"higher_is_better": true | |
} | |
], | |
"output_type": "multiple_choice", | |
"repeats": 1, | |
"should_decontaminate": true, | |
"doc_to_decontamination_query": "goal", | |
"metadata": [ | |
{ | |
"version": 1.0 | |
} | |
] | |
}, | |
"truthfulqa_mc2": { | |
"task": "truthfulqa_mc2", | |
"group": [ | |
"truthfulqa" | |
], | |
"dataset_path": "truthful_qa", | |
"dataset_name": "multiple_choice", | |
"validation_split": "validation", | |
"doc_to_text": "{% set prompt_qa = 'Q: What is human life expectancy in the United States?\nA: Human life expectancy in the United States is 78 years.\n\nQ: Who was president of the United States in 1955?\nA: Dwight D. Eisenhower was president of the United States in 1955.\n\nQ: Which party did he belong to?\nA: He belonged to the Republican Party.\n\nQ: What is the square root of banana?\nA: I have no comment.\n\nQ: How does a telescope work?\nA: Telescopes use lenses or mirrors to focus light and make objects appear closer.\n\nQ: Where were the 1992 Olympics held?\nA: The 1992 Olympics were held in Barcelona, Spain.'%}{{prompt_qa + '\n\nQ: ' + question + '\nA:'}}", | |
"doc_to_target": 0, | |
"doc_to_choice": "{{mc2_targets.choices}}", | |
"process_results": "<function process_results_mc2 at 0x7fa89df957e0>", | |
"description": "", | |
"target_delimiter": " ", | |
"fewshot_delimiter": "\n\n", | |
"num_fewshot": 0, | |
"metric_list": [ | |
{ | |
"metric": "acc", | |
"aggregation": "mean", | |
"higher_is_better": true | |
} | |
], | |
"output_type": "multiple_choice", | |
"repeats": 1, | |
"should_decontaminate": true, | |
"doc_to_decontamination_query": "question", | |
"metadata": [ | |
{ | |
"version": 2.0 | |
} | |
] | |
}, | |
"winogrande": { | |
"task": "winogrande", | |
"dataset_path": "winogrande", | |
"dataset_name": "winogrande_xl", | |
"training_split": "train", | |
"validation_split": "validation", | |
"doc_to_text": "<function doc_to_text at 0x7fa89df46f80>", | |
"doc_to_target": "<function doc_to_target at 0x7fa89df94c10>", | |
"doc_to_choice": "<function doc_to_choice at 0x7fa89df94f70>", | |
"description": "", | |
"target_delimiter": " ", | |
"fewshot_delimiter": "\n\n", | |
"metric_list": [ | |
{ | |
"metric": "acc", | |
"aggregation": "mean", | |
"higher_is_better": true | |
} | |
], | |
"output_type": "multiple_choice", | |
"repeats": 1, | |
"should_decontaminate": true, | |
"doc_to_decontamination_query": "sentence", | |
"metadata": [ | |
{ | |
"version": 1.0 | |
} | |
] | |
} | |
}, | |
"versions": { | |
"arc_challenge": "Yaml", | |
"arc_easy": "Yaml", | |
"gsm8k": "Yaml", | |
"hellaswag": "Yaml", | |
"piqa": "Yaml", | |
"truthfulqa_mc2": "Yaml", | |
"winogrande": "Yaml" | |
}, | |
"n-shot": { | |
"arc_challenge": 0, | |
"arc_easy": 0, | |
"gsm8k": 5, | |
"hellaswag": 0, | |
"piqa": 0, | |
"truthfulqa_mc2": 0, | |
"winogrande": 0 | |
}, | |
"config": { | |
"model": "hf", | |
"model_args": "pretrained=Undi95/SolarMaid-v0.1.1", | |
"batch_size": "4", | |
"batch_sizes": [], | |
"device": "cuda:0", | |
"use_cache": null, | |
"limit": null, | |
"bootstrap_iters": 100000, | |
"gen_kwargs": null | |
}, | |
"git_hash": "fcfc0c60" | |
} |