Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
説明の修正
Browse files- src/about.py +23 -39
src/about.py
CHANGED
@@ -51,16 +51,16 @@ class Tasks(Enum):
|
|
51 |
alt_j_to_e_comet_wmt22 = Task("scores", "alt-j-to-e_comet_wmt22", "ALT J to E COMET WMT22 ⭐", TaskType.MT)
|
52 |
chabsa_set_f1 = Task("scores", "chabsa_set_f1", "ChABSA ⭐", TaskType.EL)
|
53 |
commonsensemoralja_exact_match = Task(
|
54 |
-
"scores", "commonsensemoralja_exact_match", "CommonSenseMoralJA", TaskType.MC
|
55 |
)
|
56 |
-
jamp_exact_match = Task("scores", "jamp_exact_match", "JAMP", TaskType.NLI)
|
57 |
-
janli_exact_match = Task("scores", "janli_exact_match", "JANLI", TaskType.NLI)
|
58 |
-
jcommonsenseqa_exact_match = Task("scores", "jcommonsenseqa_exact_match", "JCommonSenseQA", TaskType.MC)
|
59 |
-
jemhopqa_char_f1 = Task("scores", "jemhopqa_char_f1", "JEMHopQA", TaskType.QA)
|
60 |
-
jmmlu_exact_match = Task("scores", "jmmlu_exact_match", "JMMLU", TaskType.HE)
|
61 |
-
jnli_exact_match = Task("scores", "jnli_exact_match", "JNLI", TaskType.NLI)
|
62 |
-
jsem_exact_match = Task("scores", "jsem_exact_match", "JSEM", TaskType.NLI)
|
63 |
-
jsick_exact_match = Task("scores", "jsick_exact_match", "JSICK", TaskType.NLI)
|
64 |
jsquad_char_f1 = Task("scores", "jsquad_char_f1", "JSquad ⭐", TaskType.RC)
|
65 |
jsts_pearson = Task(
|
66 |
"scores", "jsts_pearson", "JSTS (Pearson)", TaskType.STS
|
@@ -68,18 +68,18 @@ class Tasks(Enum):
|
|
68 |
jsts_spearman = Task(
|
69 |
"scores", "jsts_spearman", "JSTS (Spearman)", TaskType.STS
|
70 |
) # Semantic Textual Similarity - 意味的類似度
|
71 |
-
kuci_exact_match = Task("scores", "kuci_exact_match", "KUCI", TaskType.MC)
|
72 |
mawps_exact_match = Task("scores", "mawps_exact_match", "MAWPS ⭐", TaskType.MR)
|
73 |
mbpp_code_exec = Task("scores", "mbpp_code_exec", "MBPP (exec) ⭐", TaskType.CG)
|
74 |
mbpp_pylint_check = Task("scores", "mbpp_pylint_check", "MBPP (pylint)", TaskType.CG)
|
75 |
-
mmlu_en_exact_match = Task("scores", "mmlu_en_exact_match", "MMLU", TaskType.HE)
|
76 |
-
niilc_char_f1 = Task("scores", "niilc_char_f1", "NIILC", TaskType.QA)
|
77 |
-
aio_char_f1 = Task("scores", "aio_char_f1", "JAQKET", TaskType.QA)
|
78 |
-
wiki_coreference_set_f1 = Task("scores", "wiki_coreference_set_f1", "Wiki Coreference", TaskType.FA)
|
79 |
-
wiki_dependency_set_f1 = Task("scores", "wiki_dependency_set_f1", "Wiki Dependency", TaskType.FA)
|
80 |
-
wiki_ner_set_f1 = Task("scores", "wiki_ner_set_f1", "Wiki NER", TaskType.FA)
|
81 |
-
wiki_pas_set_f1 = Task("scores", "wiki_pas_set_f1", "Wiki PAS", TaskType.FA)
|
82 |
-
wiki_reading_char_f1 = Task("scores", "wiki_reading_char_f1", "Wiki Reading", TaskType.FA)
|
83 |
wikicorpus_e_to_j_bert_score_ja_f1 = Task(
|
84 |
"scores", "wikicorpus-e-to-j_bert_score_ja_f1", "WikiCorpus E to J BERT Score", TaskType.MT
|
85 |
)
|
@@ -221,21 +221,13 @@ This task is supported by llm-jp-eval, but it is not included in the evaluation
|
|
221 |
To reproduce our results, please follow the instructions of the evalution tool, **llm-jp-eval** available in [Japanese](https://github.com/llm-jp/llm-jp-eval/blob/main/README.md) and in [English](https://github.com/llm-jp/llm-jp-eval/blob/main/README_en.md).
|
222 |
|
223 |
## Average Score Calculation
|
224 |
-
|
225 |
-
|
226 |
-
Tasks included in average calculation:
|
227 |
-
- RC: JSQuAD ⭐
|
228 |
-
- EL: ChABSA ⭐
|
229 |
-
- MR: MAWPS ⭐
|
230 |
-
- MT: ALT E to J COMET WMT22 ⭐, ALT J to E COMET WMT22 ⭐, WikiCorpus E to J COMET WMT22 ⭐, WikiCorpus J to E COMET WMT22 ⭐
|
231 |
-
- CG: MBPP (exec) ⭐
|
232 |
-
- SUM: XL-Sum ROUGE2 ⭐
|
233 |
|
234 |
"""
|
235 |
|
236 |
LLM_BENCHMARKS_TEXT_JA = """
|
237 |
## 仕組み
|
238 |
-
📈
|
239 |
|
240 |
**NLI(自然言語推論)**
|
241 |
|
@@ -293,7 +285,7 @@ LLM_BENCHMARKS_TEXT_JA = """
|
|
293 |
|
294 |
**STS(意味的テキスト類似度)**
|
295 |
|
296 |
-
このタスクはllm-jp-eval
|
297 |
|
298 |
* `JSTS`、STS(Semantic Textual Similarity)の日本語版(JGLUEの一部)[ソース](https://github.com/yahoojapan/JGLUE)(ライセンス CC BY-SA 4.0)
|
299 |
|
@@ -312,18 +304,10 @@ LLM_BENCHMARKS_TEXT_JA = """
|
|
312 |
* `XL-Sum`、44言語の大規模多言語抽象型要約データセットの日本語部分 [ソース](https://github.com/csebuetnlp/xl-sum)(ライセンス CC BY-NC-SA 4.0、非商用ライセンスのため、このデータセットは使用しません。ライセンスと利用規約に明確に同意した場合を除きます)
|
313 |
|
314 |
## 再現性
|
315 |
-
|
316 |
|
317 |
## 平均スコアの計算について
|
318 |
-
|
319 |
-
|
320 |
-
平均値計算に含まれるタスク:
|
321 |
-
- RC:JSQuAD ⭐
|
322 |
-
- EL:ChABSA ⭐
|
323 |
-
- MR:MAWPS ⭐
|
324 |
-
- MT:ALT E to J COMET WMT22 ⭐、ALT J to E COMET WMT22 ⭐、WikiCorpus E to J COMET WMT22 ⭐、WikiCorpus J to E COMET WMT22 ⭐
|
325 |
-
- CG:MBPP (exec) ⭐
|
326 |
-
- SUM:XL-Sum ROUGE2 ⭐
|
327 |
"""
|
328 |
|
329 |
|
|
|
51 |
alt_j_to_e_comet_wmt22 = Task("scores", "alt-j-to-e_comet_wmt22", "ALT J to E COMET WMT22 ⭐", TaskType.MT)
|
52 |
chabsa_set_f1 = Task("scores", "chabsa_set_f1", "ChABSA ⭐", TaskType.EL)
|
53 |
commonsensemoralja_exact_match = Task(
|
54 |
+
"scores", "commonsensemoralja_exact_match", "CommonSenseMoralJA ⭐", TaskType.MC
|
55 |
)
|
56 |
+
jamp_exact_match = Task("scores", "jamp_exact_match", "JAMP ⭐", TaskType.NLI)
|
57 |
+
janli_exact_match = Task("scores", "janli_exact_match", "JANLI ⭐", TaskType.NLI)
|
58 |
+
jcommonsenseqa_exact_match = Task("scores", "jcommonsenseqa_exact_match", "JCommonSenseQA ⭐", TaskType.MC)
|
59 |
+
jemhopqa_char_f1 = Task("scores", "jemhopqa_char_f1", "JEMHopQA ⭐", TaskType.QA)
|
60 |
+
jmmlu_exact_match = Task("scores", "jmmlu_exact_match", "JMMLU ⭐", TaskType.HE)
|
61 |
+
jnli_exact_match = Task("scores", "jnli_exact_match", "JNLI ⭐", TaskType.NLI)
|
62 |
+
jsem_exact_match = Task("scores", "jsem_exact_match", "JSEM ⭐", TaskType.NLI)
|
63 |
+
jsick_exact_match = Task("scores", "jsick_exact_match", "JSICK ⭐", TaskType.NLI)
|
64 |
jsquad_char_f1 = Task("scores", "jsquad_char_f1", "JSquad ⭐", TaskType.RC)
|
65 |
jsts_pearson = Task(
|
66 |
"scores", "jsts_pearson", "JSTS (Pearson)", TaskType.STS
|
|
|
68 |
jsts_spearman = Task(
|
69 |
"scores", "jsts_spearman", "JSTS (Spearman)", TaskType.STS
|
70 |
) # Semantic Textual Similarity - 意味的類似度
|
71 |
+
kuci_exact_match = Task("scores", "kuci_exact_match", "KUCI ⭐", TaskType.MC)
|
72 |
mawps_exact_match = Task("scores", "mawps_exact_match", "MAWPS ⭐", TaskType.MR)
|
73 |
mbpp_code_exec = Task("scores", "mbpp_code_exec", "MBPP (exec) ⭐", TaskType.CG)
|
74 |
mbpp_pylint_check = Task("scores", "mbpp_pylint_check", "MBPP (pylint)", TaskType.CG)
|
75 |
+
mmlu_en_exact_match = Task("scores", "mmlu_en_exact_match", "MMLU ⭐", TaskType.HE)
|
76 |
+
niilc_char_f1 = Task("scores", "niilc_char_f1", "NIILC ⭐", TaskType.QA)
|
77 |
+
aio_char_f1 = Task("scores", "aio_char_f1", "JAQKET ⭐", TaskType.QA)
|
78 |
+
wiki_coreference_set_f1 = Task("scores", "wiki_coreference_set_f1", "Wiki Coreference ⭐", TaskType.FA)
|
79 |
+
wiki_dependency_set_f1 = Task("scores", "wiki_dependency_set_f1", "Wiki Dependency ⭐", TaskType.FA)
|
80 |
+
wiki_ner_set_f1 = Task("scores", "wiki_ner_set_f1", "Wiki NER ⭐", TaskType.FA)
|
81 |
+
wiki_pas_set_f1 = Task("scores", "wiki_pas_set_f1", "Wiki PAS ⭐", TaskType.FA)
|
82 |
+
wiki_reading_char_f1 = Task("scores", "wiki_reading_char_f1", "Wiki Reading ⭐", TaskType.FA)
|
83 |
wikicorpus_e_to_j_bert_score_ja_f1 = Task(
|
84 |
"scores", "wikicorpus-e-to-j_bert_score_ja_f1", "WikiCorpus E to J BERT Score", TaskType.MT
|
85 |
)
|
|
|
221 |
To reproduce our results, please follow the instructions of the evalution tool, **llm-jp-eval** available in [Japanese](https://github.com/llm-jp/llm-jp-eval/blob/main/README.md) and in [English](https://github.com/llm-jp/llm-jp-eval/blob/main/README_en.md).
|
222 |
|
223 |
## Average Score Calculation
|
224 |
+
The calculation of the average score (AVG) includes only the scores marked with a ⭐.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
|
226 |
"""
|
227 |
|
228 |
LLM_BENCHMARKS_TEXT_JA = """
|
229 |
## 仕組み
|
230 |
+
📈 評価ツール [llm-jp-eval](https://github.com/llm-jp/llm-jp-eval) を活用し、16種類のタスクで日本語の大規模言語モデルを評価します。このツールは、様々な評価タスクで日本語LLMを評価するための統一的なフレームワークです。
|
231 |
|
232 |
**NLI(自然言語推論)**
|
233 |
|
|
|
285 |
|
286 |
**STS(意味的テキスト類似度)**
|
287 |
|
288 |
+
このタスクはllm-jp-evalでサポートされていますが、平均スコア (AVG) の計算には含まれていません。
|
289 |
|
290 |
* `JSTS`、STS(Semantic Textual Similarity)の日本語版(JGLUEの一部)[ソース](https://github.com/yahoojapan/JGLUE)(ライセンス CC BY-SA 4.0)
|
291 |
|
|
|
304 |
* `XL-Sum`、44言語の大規模多言語抽象型要約データセットの日本語部分 [ソース](https://github.com/csebuetnlp/xl-sum)(ライセンス CC BY-NC-SA 4.0、非商用ライセンスのため、このデータセットは使用しません。ライセンスと利用規約に明確に同意した場合を除きます)
|
305 |
|
306 |
## 再現性
|
307 |
+
結果を再現するには、評価ツール **llm-jp-eval** の指示に従ってください。詳細は [日本語](https://github.com/llm-jp/llm-jp-eval/blob/main/README.md) と [英語](https://github.com/llm-jp/llm-jp-eval/blob/main/README_en.md) でご覧いただけます。
|
308 |
|
309 |
## 平均スコアの計算について
|
310 |
+
平均スコア (AVG) の計算には、⭐マークのついたスコアのみが含まれます
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
"""
|
312 |
|
313 |
|