Update README.md
Browse files
README.md
CHANGED
@@ -47,6 +47,8 @@ license: other
|
|
47 |
publisher = {Hugging Face},
|
48 |
howpublished = "\url{https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard}"
|
49 |
}
|
|
|
|
|
50 |
@software{eval-harness,
|
51 |
author = {Gao, Leo and
|
52 |
Tow, Jonathan and
|
@@ -73,6 +75,8 @@ license: other
|
|
73 |
doi = {10.5281/zenodo.5371628},
|
74 |
url = {https://doi.org/10.5281/zenodo.5371628}
|
75 |
}
|
|
|
|
|
76 |
@misc{clark2018think,
|
77 |
title={Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge},
|
78 |
author={Peter Clark and Isaac Cowhey and Oren Etzioni and Tushar Khot and Ashish Sabharwal and Carissa Schoenick and Oyvind Tafjord},
|
@@ -81,6 +85,8 @@ license: other
|
|
81 |
archivePrefix={arXiv},
|
82 |
primaryClass={cs.AI}
|
83 |
}
|
|
|
|
|
84 |
@misc{zellers2019hellaswag,
|
85 |
title={HellaSwag: Can a Machine Really Finish Your Sentence?},
|
86 |
author={Rowan Zellers and Ari Holtzman and Yonatan Bisk and Ali Farhadi and Yejin Choi},
|
@@ -89,6 +95,8 @@ license: other
|
|
89 |
archivePrefix={arXiv},
|
90 |
primaryClass={cs.CL}
|
91 |
}
|
|
|
|
|
92 |
@misc{hendrycks2021measuring,
|
93 |
title={Measuring Massive Multitask Language Understanding},
|
94 |
author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
|
@@ -97,7 +105,8 @@ license: other
|
|
97 |
archivePrefix={arXiv},
|
98 |
primaryClass={cs.CY}
|
99 |
}
|
100 |
-
|
|
|
101 |
@misc{lin2022truthfulqa,
|
102 |
title={TruthfulQA: Measuring How Models Mimic Human Falsehoods},
|
103 |
author={Stephanie Lin and Jacob Hilton and Owain Evans},
|
|
|
47 |
publisher = {Hugging Face},
|
48 |
howpublished = "\url{https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard}"
|
49 |
}
|
50 |
+
```
|
51 |
+
```
|
52 |
@software{eval-harness,
|
53 |
author = {Gao, Leo and
|
54 |
Tow, Jonathan and
|
|
|
75 |
doi = {10.5281/zenodo.5371628},
|
76 |
url = {https://doi.org/10.5281/zenodo.5371628}
|
77 |
}
|
78 |
+
```
|
79 |
+
```
|
80 |
@misc{clark2018think,
|
81 |
title={Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge},
|
82 |
author={Peter Clark and Isaac Cowhey and Oren Etzioni and Tushar Khot and Ashish Sabharwal and Carissa Schoenick and Oyvind Tafjord},
|
|
|
85 |
archivePrefix={arXiv},
|
86 |
primaryClass={cs.AI}
|
87 |
}
|
88 |
+
```
|
89 |
+
```
|
90 |
@misc{zellers2019hellaswag,
|
91 |
title={HellaSwag: Can a Machine Really Finish Your Sentence?},
|
92 |
author={Rowan Zellers and Ari Holtzman and Yonatan Bisk and Ali Farhadi and Yejin Choi},
|
|
|
95 |
archivePrefix={arXiv},
|
96 |
primaryClass={cs.CL}
|
97 |
}
|
98 |
+
```
|
99 |
+
```
|
100 |
@misc{hendrycks2021measuring,
|
101 |
title={Measuring Massive Multitask Language Understanding},
|
102 |
author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
|
|
|
105 |
archivePrefix={arXiv},
|
106 |
primaryClass={cs.CY}
|
107 |
}
|
108 |
+
```
|
109 |
+
```
|
110 |
@misc{lin2022truthfulqa,
|
111 |
title={TruthfulQA: Measuring How Models Mimic Human Falsehoods},
|
112 |
author={Stephanie Lin and Jacob Hilton and Owain Evans},
|