iknow-lab
/

ko-flan-zero-v0-0731

@@ -60,4 +60,186 @@ inference(
 25일 시에 따르면 연수구 옥련동 104 일대 29만1천725㎡(8만8천평)에 추진 중인 2만8천62가구 규모의 송도역세권구역 도시개발사업과 연계, KTX 송도역 복합환승센터와 상업시설·업무시설 등의 조성을 추진 중이다.  [SEP] 글을 읽고 시장에 미칠 영향을 판단해보세요
 {'긍정': -61.86758804321289, '부정': 23.72732925415039, '중립': -70.4837417602539}
-```

 25일 시에 따르면 연수구 옥련동 104 일대 29만1천725㎡(8만8천평)에 추진 중인 2만8천62가구 규모의 송도역세권구역 도시개발사업과 연계, KTX 송도역 복합환승센터와 상업시설·업무시설 등의 조성을 추진 중이다.  [SEP] 글을 읽고 시장에 미칠 영향을 판단해보세요
 {'긍정': -61.86758804321289, '부정': 23.72732925415039, '중립': -70.4837417602539}
+```
+## 평가(test set)
+| task | accuracy |
+| --- | --- |
+| [nsmc](https://huggingface.co/datasets/nsmc) | 85.92 |
+| [jason9693/APEACH](https://huggingface.co/datasets/jason9693/APEACH) | 32.12 |
+| [klue-ynat](https://huggingface.co/datasets/klue) | 77.59 |
+| [kobest-boolq](https://huggingface.co/datasets/skt/kobest_v1) | 76.99 |
+| [kobest-copa](https://huggingface.co/datasets/skt/kobest_v1) | 61.2 |
+| [kobest-hellaswag](https://huggingface.co/datasets/skt/kobest_v1) | 77.6 |
+| [kobest-sentineg](https://huggingface.co/datasets/skt/kobest_v1) | 55.92 |
+| [kobest-wic](https://huggingface.co/datasets/skt/kobest_v1) | 58.49 |
+### 평가 방식
+- 모델에 `[CLS] {input} [SEP] {instruction} [SEP] label [SEP]` 형식으로 넣고 나온 positive와 negative끼리 비교함.
+- positive는 정답 라벨을 사용하고, negative는 정답 라벨이 아닌 모든 라벨을 사용
+- 정답 라벨의 점수가 모든 negative보다 높을 경우 맞춘 것으로 간주함. 이런 식으로 accuracy 측정.
+테스트에 사용한 매핑 코드
+```
+klue_ynat_labelToTextDict = {
+    0: "IT과학",
+    1: "경제",
+    2: "사회",
+    3: "생활문화",
+    4: "세계",
+    5: "스포츠",
+    6: "정치",
+}
+klue_ynat_labels = set(klue_ynat_labelToTextDict.values())
+def klue_ynat_mapper(item):
+    positives = [klue_ynat_labelToTextDict[item["label"]]]
+    return {
+        "instruction": "문장을 읽고 주제를 분류하세요",
+        "input": item["title"],
+        "positives": positives,
+        "negatives": klue_ynat_labels - set(positives)
+    }
+kobest_wic_labels = ["아니오", "예"]
+def kobest_wic_mapper(item):
+    return {
+        "instruction": "주어진 두 문장에서 단어 {word}은(는) 동일한 의미로 사용되었나요?".format(word=item["word"]),
+        "input": "문장1: {context_1}\n문장2: {context_2}".format(**item),
+        "positives": [kobest_wic_labels[item['label']]],
+        "negatives": [kobest_wic_labels[1 - item['label']]]
+    }
+copa_question = {
+    "결과": "이후에 이어질 결과는?",
+    "원인": "이러한 일이 일어난 원인은?"
+}
+def kobest_copa_mapper(item):
+    answers = [item["alternative_1"], item["alternative_2"]]
+    return {
+        "instruction": copa_question[item["question"]],
+        "input": item["premise"],
+        "positives": [answers[item['label']]],
+        "negatives": [answers[1 - item['label']]]
+    }
+def kobest_hellaswag_mapper(item):
+    answers = [item[f"ending_{i}"] for i in range(1, 5)]
+    label = answers[item['label']]
+    answers.remove(label)
+    return {
+        "instruction": "이후에 이어질 내용으로 가장 적절한 것은?",
+        "input": item["context"],
+        "positives": [label],
+        "negatives": answers
+    }
+kobest_boolq_labels = ["아니오", "예"]
+def kobest_boolq_mapper(item):
+    return {
+        "instruction": item["question"],
+        "input": item["paragraph"],
+        "positives": [kobest_boolq_labels[item['label']]],
+        "negatives": [kobest_boolq_labels[1 - item['label']]]
+    }
+kobest_sentineg_labels = ["부정", "긍정"]
+def kobest_sentineg_mapper(item):
+    return {
+        "instruction": "주어진 문장의 감정을 분류하세요",
+        "input": item["sentence"],
+        "positives": [kobest_boolq_labels[item['label']]],
+        "negatives": [kobest_boolq_labels[1 - item['label']]]
+    }
+nsmc_labels = ["부정", "긍정"]
+def nsmc_mapper(item):
+    return {
+        "instruction": "주어진 문장의 감정을 분류하세요",
+        "input": item["document"],
+        "positives": [nsmc_labels[item['label']]],
+        "negatives": [nsmc_labels[1 - item['label']]]
+    }
+apeach_labels = ["혐오 표현이 아닙니다", "혐오표현"]
+def apeach_mapper(item):
+    return {
+        "instruction": "혐오성을 분류해보세요.",
+        "input": item["text"],
+        "positives": [nsmc_labels[item['class']]],
+        "negatives": [nsmc_labels[1 - item['class']]]
+    }
+EVAL_LIST = {
+    "klue-ynat": dict(
+        load_args=dict(
+            path="klue",
+            name="ynat",
+            split="validation"
+        ),
+        mapper=klue_ynat_mapper
+    ),
+    "nsmc": dict(
+        load_args=dict(
+            path="nsmc",
+            split="test"
+        ),
+        mapper=nsmc_mapper
+    ),
+    "apeach": dict(
+        load_args=dict(
+            path="jason9693/APEACH",
+            split="test"
+        ),
+        mapper=apeach_mapper
+    ),
+    "kobest-wic": dict(
+        load_args=dict(
+            path="skt/kobest_v1",
+            name="wic",
+            split="test"
+        ),
+        mapper=kobest_wic_mapper
+    ),
+    "kobest-copa": dict(
+        load_args=dict(
+            path="skt/kobest_v1",
+            name="copa",
+            split="test"
+        ),
+        mapper=kobest_copa_mapper
+    ),
+    "kobest-hellaswag": dict(
+        load_args=dict(
+            path="skt/kobest_v1",
+            name="hellaswag",
+            split="test"
+        ),
+        mapper=kobest_hellaswag_mapper
+    ),
+    "kobest-boolq": dict(
+        load_args=dict(
+            path="skt/kobest_v1",
+            name="boolq",
+            split="test"
+        ),
+        mapper=kobest_boolq_mapper
+    ),
+    "kobest-sentineg": dict(
+        load_args=dict(
+            path="skt/kobest_v1",
+            name="sentineg",
+            split="test"
+        ),
+        mapper=kobest_sentineg_mapper
+    )
+}
+```