Spaces:
Sleeping
Sleeping
JiaenLiu
commited on
Commit
·
4abcb2a
1
Parent(s):
11ff02b
bleu fix
Browse filesFormer-commit-id: 787a3bc38b29041f667fb2dbad0faca7089b83ba
evaluation/scores/__init__.py
ADDED
File without changes
|
evaluation/scores/multi_scores.py
CHANGED
@@ -10,11 +10,23 @@ class multi_scores:
|
|
10 |
self.LLM_model = LLM_eval.init_evaluator(source_lang=source_lang, target_lang=target_lang, domain=domain)
|
11 |
# self.score = {}
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
# The function to get the scores
|
14 |
# src: orginal sentence
|
15 |
# mt: machine translation
|
16 |
# ref: reference translation
|
17 |
def calculate_comet_llm(self, src:str, mt:str, ref:str) -> dict:
|
|
|
|
|
18 |
comet_score = self.comet_model.predict([{"src":src, "mt":mt, "ref":ref}], batch_size=8, gpus=0).scores[0]
|
19 |
# bleu_score = self.bleu_model.corpus_score([mt], [ref]).score
|
20 |
llm_acc, llm_completeness = LLM_eval.evaluate_prediction(src, ref, mt, self.LLM_model)
|
@@ -25,22 +37,27 @@ class multi_scores:
|
|
25 |
# self.score['llm_explanation'] = llm_explanation
|
26 |
|
27 |
def calculate_bleu(self, mts:list, refs:list) -> dict:
|
|
|
|
|
|
|
|
|
|
|
28 |
# mt and ref are list of sentences
|
29 |
bleu_score = self.bleu_model.corpus_score(mts, refs).score
|
30 |
return {'bleu_score':bleu_score}
|
31 |
|
32 |
def get_scores(self, src:str, mt:str, ref:str) -> dict:
|
33 |
comet_score = self.comet_model.predict([{"src":src, "mt":mt, "ref":ref}], batch_size=8, gpus=0).scores[0]
|
34 |
-
bleu_score = self.bleu_model.corpus_score([mt], [ref]).score
|
35 |
llm_acc, llm_completeness = LLM_eval.evaluate_prediction(src, ref, mt, self.LLM_model)
|
36 |
return {'bleu_score':bleu_score ,'comet_score':comet_score, 'llm_score':llm_acc[0], 'llm_explanation': llm_acc[1]}
|
37 |
|
38 |
|
39 |
if __name__ == "__main__":
|
40 |
-
src = "
|
41 |
-
mt = "
|
42 |
-
ref = "
|
43 |
-
print(multi_scores().get_scores(src, mt, ref))
|
44 |
# print(multi_scores().calculate_comet_llm(src, mt, ref))
|
45 |
-
|
46 |
|
|
|
10 |
self.LLM_model = LLM_eval.init_evaluator(source_lang=source_lang, target_lang=target_lang, domain=domain)
|
11 |
# self.score = {}
|
12 |
|
13 |
+
def __preprocess(self, src:str, mt:str, ref:str) -> dict:
|
14 |
+
# remove the space in the beginning and end of the sentence\
|
15 |
+
src = src.strip()
|
16 |
+
mt = mt.strip()
|
17 |
+
ref = ref.strip()
|
18 |
+
print(src, mt, ref)
|
19 |
+
return {'src':src, 'mt':mt, 'ref':ref}
|
20 |
+
|
21 |
+
|
22 |
+
|
23 |
# The function to get the scores
|
24 |
# src: orginal sentence
|
25 |
# mt: machine translation
|
26 |
# ref: reference translation
|
27 |
def calculate_comet_llm(self, src:str, mt:str, ref:str) -> dict:
|
28 |
+
# preprocess the input
|
29 |
+
src, mt, ref = self.__preprocess(src, mt, ref)
|
30 |
comet_score = self.comet_model.predict([{"src":src, "mt":mt, "ref":ref}], batch_size=8, gpus=0).scores[0]
|
31 |
# bleu_score = self.bleu_model.corpus_score([mt], [ref]).score
|
32 |
llm_acc, llm_completeness = LLM_eval.evaluate_prediction(src, ref, mt, self.LLM_model)
|
|
|
37 |
# self.score['llm_explanation'] = llm_explanation
|
38 |
|
39 |
def calculate_bleu(self, mts:list, refs:list) -> dict:
|
40 |
+
# src, mt, ref = self.__preprocess(src, mt, ref)
|
41 |
+
# remove the space in the beginning and end of the sentence for each sentence
|
42 |
+
# mts = [mt.strip() for mt in mts]
|
43 |
+
# refs = [ref.strip() for ref in refs]
|
44 |
+
# print(mts, refs)
|
45 |
# mt and ref are list of sentences
|
46 |
bleu_score = self.bleu_model.corpus_score(mts, refs).score
|
47 |
return {'bleu_score':bleu_score}
|
48 |
|
49 |
def get_scores(self, src:str, mt:str, ref:str) -> dict:
|
50 |
comet_score = self.comet_model.predict([{"src":src, "mt":mt, "ref":ref}], batch_size=8, gpus=0).scores[0]
|
51 |
+
bleu_score = self.bleu_model.corpus_score([mt], [[ref]]).score
|
52 |
llm_acc, llm_completeness = LLM_eval.evaluate_prediction(src, ref, mt, self.LLM_model)
|
53 |
return {'bleu_score':bleu_score ,'comet_score':comet_score, 'llm_score':llm_acc[0], 'llm_explanation': llm_acc[1]}
|
54 |
|
55 |
|
56 |
if __name__ == "__main__":
|
57 |
+
src = "South Korea playing with the Blue Proto's Probes"
|
58 |
+
mt = "位于对角线的另一个角落 使用蓝色的Proto's Probes"
|
59 |
+
ref = " 在对角落里使用蓝色神族探机 他的名字是..."
|
60 |
+
# print(multi_scores().get_scores(src, mt, ref))
|
61 |
# print(multi_scores().calculate_comet_llm(src, mt, ref))
|
62 |
+
print(multi_scores().calculate_bleu([mt], [[ref]]))
|
63 |
|