Update README.md
Browse files
README.md
CHANGED
@@ -49,8 +49,73 @@ print(text)
|
|
49 |
|
50 |
# Benchmark Scores
|
51 |
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
|
56 |
## Citations
|
|
|
49 |
|
50 |
# Benchmark Scores
|
51 |
|
52 |
+
I ran the benchmark harness, for curiousity, but this model is completely geared towards summarizing.
|
53 |
+
|
54 |
+
| Test Name | Accuracy |
|
55 |
+
|------------------------------------------------------|----------------------|
|
56 |
+
| all | 0.579149139810157 |
|
57 |
+
| arc:challenge | 0.5631399317406144 |
|
58 |
+
| hellaswag | 0.6317466640111532 |
|
59 |
+
| hendrycksTest-abstract_algebra | 0.32 |
|
60 |
+
| hendrycksTest-anatomy | 0.5481481481481482 |
|
61 |
+
| hendrycksTest-astronomy | 0.5657894736842105 |
|
62 |
+
| hendrycksTest-business_ethics | 0.55 |
|
63 |
+
| hendrycksTest-clinical_knowledge | 0.6 |
|
64 |
+
| hendrycksTest-college_biology | 0.6388888888888888 |
|
65 |
+
| hendrycksTest-college_chemistry | 0.38 |
|
66 |
+
| hendrycksTest-college_computer_science | 0.43 |
|
67 |
+
| hendrycksTest-college_mathematics | 0.34 |
|
68 |
+
| hendrycksTest-college_medicine | 0.5260115606936416 |
|
69 |
+
| hendrycksTest-college_physics | 0.3431372549019608 |
|
70 |
+
| hendrycksTest-computer_security | 0.71 |
|
71 |
+
| hendrycksTest-conceptual_physics | 0.49361702127659574 |
|
72 |
+
| hendrycksTest-econometrics | 0.35964912280701755 |
|
73 |
+
| hendrycksTest-electrical_engineering | 0.5586206896551724 |
|
74 |
+
| hendrycksTest-elementary_mathematics | 0.3439153439153439 |
|
75 |
+
| hendrycksTest-formal_logic | 0.3333333333333333 |
|
76 |
+
| hendrycksTest-global_facts | 0.42 |
|
77 |
+
| hendrycksTest-high_school_biology | 0.6903225806451613 |
|
78 |
+
| hendrycksTest-high_school_chemistry | 0.45320197044334976 |
|
79 |
+
| hendrycksTest-high_school_computer_science | 0.58 |
|
80 |
+
| hendrycksTest-high_school_european_history | 0.6787878787878788 |
|
81 |
+
| hendrycksTest-high_school_geography | 0.7424242424242424 |
|
82 |
+
| hendrycksTest-high_school_government_and_politics | 0.8341968911917098 |
|
83 |
+
| hendrycksTest-high_school_macroeconomics | 0.558974358974359 |
|
84 |
+
| hendrycksTest-high_school_mathematics | 0.3 |
|
85 |
+
| hendrycksTest-high_school_microeconomics | 0.5672268907563025 |
|
86 |
+
| hendrycksTest-high_school_physics | 0.33112582781456956 |
|
87 |
+
| hendrycksTest-high_school_psychology | 0.7577981651376147 |
|
88 |
+
| hendrycksTest-high_school_statistics | 0.4212962962962963 |
|
89 |
+
| hendrycksTest-high_school_us_history | 0.8186274509803921 |
|
90 |
+
| hendrycksTest-high_school_world_history | 0.759493670886076 |
|
91 |
+
| hendrycksTest-human_aging | 0.6547085201793722 |
|
92 |
+
| hendrycksTest-human_sexuality | 0.6412213740458015 |
|
93 |
+
| hendrycksTest-international_law | 0.6776859504132231 |
|
94 |
+
| hendrycksTest-jurisprudence | 0.75 |
|
95 |
+
| hendrycksTest-logical_fallacies | 0.6993865030674846 |
|
96 |
+
| hendrycksTest-machine_learning | 0.41964285714285715 |
|
97 |
+
| hendrycksTest-management | 0.7281553398058253 |
|
98 |
+
| hendrycksTest-marketing | 0.8504273504273504 |
|
99 |
+
| hendrycksTest-medical_genetics | 0.6 |
|
100 |
+
| hendrycksTest-miscellaneous | 0.7624521072796935 |
|
101 |
+
| hendrycksTest-moral_disputes | 0.6560693641618497 |
|
102 |
+
| hendrycksTest-moral_scenarios | 0.4346368715083799 |
|
103 |
+
| hendrycksTest-nutrition | 0.673202614379085 |
|
104 |
+
| hendrycksTest-philosophy | 0.7009646302250804 |
|
105 |
+
| hendrycksTest-prehistory | 0.7067901234567902 |
|
106 |
+
| hendrycksTest-professional_accounting | 0.4645390070921986 |
|
107 |
+
| hendrycksTest-professional_law | 0.45697522816166886 |
|
108 |
+
| hendrycksTest-professional_medicine | 0.5514705882352942 |
|
109 |
+
| hendrycksTest-professional_psychology | 0.6013071895424836 |
|
110 |
+
| hendrycksTest-public_relations | 0.6636363636363637 |
|
111 |
+
| hendrycksTest-security_studies | 0.6448979591836734 |
|
112 |
+
| hendrycksTest-sociology | 0.7611940298507462 |
|
113 |
+
| hendrycksTest-us_foreign_policy | 0.84 |
|
114 |
+
| hendrycksTest-virology | 0.4819277108433735 |
|
115 |
+
| hendrycksTest-world_religions | 0.7894736842105263 |
|
116 |
+
| truthfulqa:mc | 0.4762440289139372 |
|
117 |
+
| winogrande | 0.7616416732438832 |
|
118 |
+
| gsm8k | 0.20621683093252463 |
|
119 |
|
120 |
|
121 |
## Citations
|