Update README.md
Browse files
README.md
CHANGED
@@ -190,74 +190,72 @@ Taiwan LLM v2 is conducted in collaboration with [Ubitus K.K.](http://ubitus.net
|
|
190 |
|
191 |
|
192 |
## TC-Eval
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
community
|
197 |
-
|
198 |
-
community
|
199 |
-
community
|
200 |
-
community
|
201 |
-
community
|
202 |
-
community
|
203 |
-
community
|
204 |
-
community
|
205 |
-
community
|
206 |
-
community
|
207 |
-
community
|
208 |
-
community
|
209 |
-
community
|
210 |
-
community
|
211 |
-
community
|
212 |
-
community
|
213 |
-
community
|
214 |
-
community
|
215 |
-
community
|
216 |
-
community
|
217 |
-
community
|
218 |
-
community
|
219 |
-
community
|
220 |
-
community
|
221 |
-
community
|
222 |
-
community
|
223 |
-
community
|
224 |
-
community
|
225 |
-
community
|
226 |
-
community
|
227 |
-
community
|
228 |
-
community
|
229 |
-
community
|
230 |
-
community
|
231 |
-
community
|
232 |
-
community
|
233 |
-
community
|
234 |
-
community
|
235 |
-
community
|
236 |
-
community
|
237 |
-
community
|
238 |
-
community
|
239 |
-
community
|
240 |
-
community
|
241 |
-
community
|
242 |
-
community
|
243 |
-
community
|
244 |
-
community
|
245 |
-
community
|
246 |
-
community
|
247 |
-
community
|
248 |
-
community
|
249 |
-
community
|
250 |
-
community
|
251 |
-
community
|
252 |
-
community
|
253 |
-
community
|
254 |
-
community
|
255 |
-
community
|
256 |
-
community
|
257 |
-
community
|
258 |
-
community
|
259 |
-
community
|
260 |
-
community
|
261 |
-
community
|
262 |
-
community|tc-eval-v2:tmmluplus-veterinary_pathology|5|0
|
263 |
-
community|tc-eval-v2:tmmluplus-veterinary_pharmacology|5|0
|
|
|
190 |
|
191 |
|
192 |
## TC-Eval
|
193 |
+
| Task |Version|Metric|Value | |Stderr|
|
194 |
+
|---------------------------------------------------------------------------------|------:|------|-----:|---|-----:|
|
195 |
+
|community:tc-eval-v2:_average:5 | |acc |0.3508|± |0.0318|
|
196 |
+
|community:tc-eval-v2:tmmluplus-accounting:5 | 0|acc |0.2565|± |0.0317|
|
197 |
+
|community:tc-eval-v2:tmmluplus-administrative_law:5 | 0|acc |0.2833|± |0.0220|
|
198 |
+
|community:tc-eval-v2:tmmluplus-advance_chemistry:5 | 0|acc |0.3333|± |0.0427|
|
199 |
+
|community:tc-eval-v2:tmmluplus-agriculture:5 | 0|acc |0.1987|± |0.0326|
|
200 |
+
|community:tc-eval-v2:tmmluplus-anti_money_laundering:5 | 0|acc |0.5597|± |0.0430|
|
201 |
+
|community:tc-eval-v2:tmmluplus-auditing:5 | 0|acc |0.2836|± |0.0192|
|
202 |
+
|community:tc-eval-v2:tmmluplus-basic_medical_science:5 | 0|acc |0.2841|± |0.0146|
|
203 |
+
|community:tc-eval-v2:tmmluplus-business_management:5 | 0|acc |0.4245|± |0.0421|
|
204 |
+
|community:tc-eval-v2:tmmluplus-chinese_language_and_literature:5 | 0|acc |0.2714|± |0.0316|
|
205 |
+
|community:tc-eval-v2:tmmluplus-clinical_psychology:5 | 0|acc |0.3840|± |0.0437|
|
206 |
+
|community:tc-eval-v2:tmmluplus-computer_science:5 | 0|acc |0.4195|± |0.0375|
|
207 |
+
|community:tc-eval-v2:tmmluplus-culinary_skills:5 | 0|acc |0.4589|± |0.0292|
|
208 |
+
|community:tc-eval-v2:tmmluplus-dentistry:5 | 0|acc |0.3885|± |0.0244|
|
209 |
+
|community:tc-eval-v2:tmmluplus-economics:5 | 0|acc |0.3053|± |0.0233|
|
210 |
+
|community:tc-eval-v2:tmmluplus-education:5 | 0|acc |0.4355|± |0.0447|
|
211 |
+
|community:tc-eval-v2:tmmluplus-education_(profession_level):5 | 0|acc |0.2819|± |0.0204|
|
212 |
+
|community:tc-eval-v2:tmmluplus-educational_psychology:5 | 0|acc |0.4489|± |0.0376|
|
213 |
+
|community:tc-eval-v2:tmmluplus-engineering_math:5 | 0|acc |0.2718|± |0.0441|
|
214 |
+
|community:tc-eval-v2:tmmluplus-finance_banking:5 | 0|acc |0.3037|± |0.0397|
|
215 |
+
|community:tc-eval-v2:tmmluplus-financial_analysis:5 | 0|acc |0.2801|± |0.0230|
|
216 |
+
|community:tc-eval-v2:tmmluplus-fire_science:5 | 0|acc |0.2500|± |0.0390|
|
217 |
+
|community:tc-eval-v2:tmmluplus-general_principles_of_law:5 | 0|acc |0.3113|± |0.0452|
|
218 |
+
|community:tc-eval-v2:tmmluplus-geography_of_taiwan:5 | 0|acc |0.4492|± |0.0180|
|
219 |
+
|community:tc-eval-v2:tmmluplus-human_behavior:5 | 0|acc |0.3883|± |0.0278|
|
220 |
+
|community:tc-eval-v2:tmmluplus-insurance_studies:5 | 0|acc |0.3487|± |0.0173|
|
221 |
+
|community:tc-eval-v2:tmmluplus-introduction_to_law:5 | 0|acc |0.3165|± |0.0303|
|
222 |
+
|community:tc-eval-v2:tmmluplus-jce_humanities:5 | 0|acc |0.3444|± |0.0504|
|
223 |
+
|community:tc-eval-v2:tmmluplus-junior_chemistry:5 | 0|acc |0.3158|± |0.0322|
|
224 |
+
|community:tc-eval-v2:tmmluplus-junior_chinese_exam:5 | 0|acc |0.4171|± |0.0374|
|
225 |
+
|community:tc-eval-v2:tmmluplus-junior_math_exam:5 | 0|acc |0.2286|± |0.0318|
|
226 |
+
|community:tc-eval-v2:tmmluplus-junior_science_exam:5 | 0|acc |0.3427|± |0.0326|
|
227 |
+
|community:tc-eval-v2:tmmluplus-junior_social_studies:5 | 0|acc |0.4683|± |0.0446|
|
228 |
+
|community:tc-eval-v2:tmmluplus-logic_reasoning:5 | 0|acc |0.2734|± |0.0379|
|
229 |
+
|community:tc-eval-v2:tmmluplus-macroeconomics:5 | 0|acc |0.3187|± |0.0230|
|
230 |
+
|community:tc-eval-v2:tmmluplus-management_accounting:5 | 0|acc |0.2977|± |0.0313|
|
231 |
+
|community:tc-eval-v2:tmmluplus-marketing_management:5 | 0|acc |0.4624|± |0.0520|
|
232 |
+
|community:tc-eval-v2:tmmluplus-mechanical:5 | 0|acc |0.4831|± |0.0462|
|
233 |
+
|community:tc-eval-v2:tmmluplus-music:5 | 0|acc |0.3993|± |0.0294|
|
234 |
+
|community:tc-eval-v2:tmmluplus-national_protection:5 | 0|acc |0.4929|± |0.0345|
|
235 |
+
|community:tc-eval-v2:tmmluplus-nautical_science:5 | 0|acc |0.2777|± |0.0191|
|
236 |
+
|community:tc-eval-v2:tmmluplus-occupational_therapy_for_psychological_disorders:5| 0|acc |0.4438|± |0.0213|
|
237 |
+
|community:tc-eval-v2:tmmluplus-official_document_management:5 | 0|acc |0.3559|± |0.0322|
|
238 |
+
|community:tc-eval-v2:tmmluplus-optometry:5 | 0|acc |0.2804|± |0.0148|
|
239 |
+
|community:tc-eval-v2:tmmluplus-organic_chemistry:5 | 0|acc |0.3486|± |0.0459|
|
240 |
+
|community:tc-eval-v2:tmmluplus-pharmacology:5 | 0|acc |0.3397|± |0.0197|
|
241 |
+
|community:tc-eval-v2:tmmluplus-pharmacy:5 | 0|acc |0.2174|± |0.0209|
|
242 |
+
|community:tc-eval-v2:tmmluplus-physical_education:5 | 0|acc |0.3966|± |0.0367|
|
243 |
+
|community:tc-eval-v2:tmmluplus-physics:5 | 0|acc |0.2371|± |0.0434|
|
244 |
+
|community:tc-eval-v2:tmmluplus-politic_science:5 | 0|acc |0.3407|± |0.0150|
|
245 |
+
|community:tc-eval-v2:tmmluplus-real_estate:5 | 0|acc |0.3804|± |0.0509|
|
246 |
+
|community:tc-eval-v2:tmmluplus-secondary_physics:5 | 0|acc |0.3393|± |0.0449|
|
247 |
+
|community:tc-eval-v2:tmmluplus-statistics_and_machine_learning:5 | 0|acc |0.3438|± |0.0318|
|
248 |
+
|community:tc-eval-v2:tmmluplus-taiwanese_hokkien:5 | 0|acc |0.2636|± |0.0389|
|
249 |
+
|community:tc-eval-v2:tmmluplus-taxation:5 | 0|acc |0.2507|± |0.0224|
|
250 |
+
|community:tc-eval-v2:tmmluplus-technical:5 | 0|acc |0.4204|± |0.0247|
|
251 |
+
|community:tc-eval-v2:tmmluplus-three_principles_of_people:5 | 0|acc |0.5396|± |0.0424|
|
252 |
+
|community:tc-eval-v2:tmmluplus-trade:5 | 0|acc |0.2251|± |0.0187|
|
253 |
+
|community:tc-eval-v2:tmmluplus-traditional_chinese_medicine_clinical_medicine:5 | 0|acc |0.3094|± |0.0278|
|
254 |
+
|community:tc-eval-v2:tmmluplus-trust_practice:5 | 0|acc |0.3292|± |0.0235|
|
255 |
+
|community:tc-eval-v2:tmmluplus-ttqav2:5 | 0|acc |0.6726|± |0.0443|
|
256 |
+
|community:tc-eval-v2:tmmluplus-tve_chinese_language:5 | 0|acc |0.4161|± |0.0225|
|
257 |
+
|community:tc-eval-v2:tmmluplus-tve_design:5 | 0|acc |0.4542|± |0.0227|
|
258 |
+
|community:tc-eval-v2:tmmluplus-tve_mathematics:5 | 0|acc |0.2733|± |0.0365|
|
259 |
+
|community:tc-eval-v2:tmmluplus-tve_natural_sciences:5 | 0|acc |0.3349|± |0.0229|
|
260 |
+
|community:tc-eval-v2:tmmluplus-veterinary_pathology:5 | 0|acc |0.2544|± |0.0259|
|
261 |
+
|community:tc-eval-v2:tmmluplus-veterinary_pharmacology:5 | 0|acc |0.3259|± |0.0202|
|
|
|
|