ibivibiv commited on
Commit
9a4c0f3
1 Parent(s): 1cf4f31

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +95 -0
README.md CHANGED
@@ -47,3 +47,98 @@ outputs = model.generate(**inputs, max_length=200)
47
  text = tokenizer.batch_decode(outputs)[0]
48
  print(text)
49
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  text = tokenizer.batch_decode(outputs)[0]
48
  print(text)
49
  ```
50
+
51
+
52
+ @misc{open-llm-leaderboard,
53
+ author = {Edward Beeching and Clémentine Fourrier and Nathan Habib and Sheon Han and Nathan Lambert and Nazneen Rajani and Omar Sanseviero and Lewis Tunstall and Thomas Wolf},
54
+ title = {Open LLM Leaderboard},
55
+ year = {2023},
56
+ publisher = {Hugging Face},
57
+ howpublished = "\url{https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard}"
58
+ }
59
+ @software{eval-harness,
60
+ author = {Gao, Leo and
61
+ Tow, Jonathan and
62
+ Biderman, Stella and
63
+ Black, Sid and
64
+ DiPofi, Anthony and
65
+ Foster, Charles and
66
+ Golding, Laurence and
67
+ Hsu, Jeffrey and
68
+ McDonell, Kyle and
69
+ Muennighoff, Niklas and
70
+ Phang, Jason and
71
+ Reynolds, Laria and
72
+ Tang, Eric and
73
+ Thite, Anish and
74
+ Wang, Ben and
75
+ Wang, Kevin and
76
+ Zou, Andy},
77
+ title = {A framework for few-shot language model evaluation},
78
+ month = sep,
79
+ year = 2021,
80
+ publisher = {Zenodo},
81
+ version = {v0.0.1},
82
+ doi = {10.5281/zenodo.5371628},
83
+ url = {https://doi.org/10.5281/zenodo.5371628}
84
+ }
85
+ @misc{clark2018think,
86
+ title={Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge},
87
+ author={Peter Clark and Isaac Cowhey and Oren Etzioni and Tushar Khot and Ashish Sabharwal and Carissa Schoenick and Oyvind Tafjord},
88
+ year={2018},
89
+ eprint={1803.05457},
90
+ archivePrefix={arXiv},
91
+ primaryClass={cs.AI}
92
+ }
93
+ @misc{zellers2019hellaswag,
94
+ title={HellaSwag: Can a Machine Really Finish Your Sentence?},
95
+ author={Rowan Zellers and Ari Holtzman and Yonatan Bisk and Ali Farhadi and Yejin Choi},
96
+ year={2019},
97
+ eprint={1905.07830},
98
+ archivePrefix={arXiv},
99
+ primaryClass={cs.CL}
100
+ }
101
+ @misc{hendrycks2021measuring,
102
+ title={Measuring Massive Multitask Language Understanding},
103
+ author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
104
+ year={2021},
105
+ eprint={2009.03300},
106
+ archivePrefix={arXiv},
107
+ primaryClass={cs.CY}
108
+ }
109
+ @misc{lin2022truthfulqa,
110
+ title={TruthfulQA: Measuring How Models Mimic Human Falsehoods},
111
+ author={Stephanie Lin and Jacob Hilton and Owain Evans},
112
+ year={2022},
113
+ eprint={2109.07958},
114
+ archivePrefix={arXiv},
115
+ primaryClass={cs.CL}
116
+ }
117
+ @misc{DBLP:journals/corr/abs-1907-10641,
118
+ title={{WINOGRANDE:} An Adversarial Winograd Schema Challenge at Scale},
119
+ author={Keisuke Sakaguchi and Ronan Le Bras and Chandra Bhagavatula and Yejin Choi},
120
+ year={2019},
121
+ eprint={1907.10641},
122
+ archivePrefix={arXiv},
123
+ primaryClass={cs.CL}
124
+ }
125
+ @misc{DBLP:journals/corr/abs-2110-14168,
126
+ title={Training Verifiers to Solve Math Word Problems},
127
+ author={Karl Cobbe and
128
+ Vineet Kosaraju and
129
+ Mohammad Bavarian and
130
+ Mark Chen and
131
+ Heewoo Jun and
132
+ Lukasz Kaiser and
133
+ Matthias Plappert and
134
+ Jerry Tworek and
135
+ Jacob Hilton and
136
+ Reiichiro Nakano and
137
+ Christopher Hesse and
138
+ John Schulman},
139
+ year={2021},
140
+ eprint={2110.14168},
141
+ archivePrefix={arXiv},
142
+ primaryClass={cs.CL}
143
+ }
144
+