# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """TODO: Add a description here.""" import copy import re from typing import List, Dict, Union,Callable import numpy as np import datasets import evaluate from rouge_chinese import Rouge from scipy.optimize import linear_sum_assignment # TODO: Add BibTeX citation _CITATION = """\ @InProceedings{huggingface:module, title = {A great new module}, authors={huggingface, Inc.}, year={2020} } """ # TODO: Add description of the module here _DESCRIPTION = """\ evaluate sentiment quadruples. 评估生成模型的情感四元组 """ # TODO: Add description of the arguments of the module here _KWARGS_DESCRIPTION = """ Calculates how good are predictions given some references, using certain scores Args: predictions: list of predictions to score. Each predictions should be a string with tokens separated by spaces. references: list of reference for each prediction. Each reference should be a string with tokens separated by spaces. Returns: score: sentiment quadruple match score Examples: Examples should be written in doctest format, and should illustrate how to use the function. >>> my_new_module = evaluate.load("my_new_module") >>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1]) >>> print(results) {'accuracy': 1.0} """ def compute_quadruple_f1(y_pred: List[str], y_true: Union[List[str], List[List[str]]], return_rp=False, **kwargs) -> Dict[str, float]: assert len(y_pred) == len(y_true) correct, pred_num, true_num = 0, 0, 0 for pred, true in zip(y_pred, y_true): pred = CommentUnitsSim.from_str(pred, **kwargs) # 如果true是list,说明有多个正确答案 if isinstance(true, str): true = CommentUnitsSim.from_str(true, **kwargs) else: true = [CommentUnitsSim.from_str(t,**kwargs) for t in true] # 如果true是list,说明有多个正确答案,取最高分 if isinstance(true, list): correct_list = [pred.compare_same(t) for t in true] correct += max(correct_list) # 获取得分最高的值 correct_index = correct_list.index(max(correct_list)) # 获取得分最高的索引 pred_num += pred.num true_num += true[correct_index].num else: correct += pred.compare_same(true) pred_num += pred.num true_num += true.num # 以下结果保留4位小数 precision = round(correct / pred_num, 4) + 1e-8 recall = round(correct / true_num, 4) + 1e-8 f1 = round(2 * precision * recall / (precision + recall), 4) if return_rp: return {"precision": precision, "recall": recall, "f1": f1} else: return f1 # 计算rougel的f1值 def get_rougel_f1(text_pred_list: List[str], text_true_list: List[str]) -> float: assert len(text_pred_list) == len(text_true_list), "文本数量不一致" #如果text_pred_list[0]为空字符串或空格,则返回0 if not text_pred_list[0].strip(): return 0 rouge = Rouge() # 判断text_true[0]是否有中文,有中文则要用空格分割 if re.search(u"[\u4e00-\u9fa5]+", text_pred_list[0]): text_pred_list = [' '.join(list(text_pred)) for text_pred in text_pred_list] text_true_list = [' '.join(list(text_true)) for text_true in text_true_list] rouge_l_f1 = rouge.get_scores(text_pred_list, text_true_list, avg=True)['rouge-l']['f'] return rouge_l_f1 # 记录四元组的函数 class CommentUnitsSim: def __init__(self, data: List[Dict[str, str]],data_source:any=None,abnormal=False,language=None): self.data_source=data_source self.abnormal=abnormal data=copy.deepcopy(data) # 如果字典有target,则改名为target_text for quad_dict in data: if 'target' in quad_dict: quad_dict['target_text'] = quad_dict['target'] del quad_dict['target'] if 'opinion' in quad_dict: quad_dict['opinion_text'] = quad_dict['opinion'] del quad_dict['opinion'] self.data = data self.polarity_en2zh = {'positive': '积极', 'negative': '消极', 'neutral': '中性','pos':'积极','neg':'消极','neu':'中性','积极':'积极','消极':'消极','中性':'中性'} self.polarity_zh2en={'积极':'pos','消极':'neg','中性':'neu','pos':'pos','neg':'neg','neu':'neu','positive':'pos','negative':'neg','neutral':'neu'} self.language=language if language is not None else 'zh' if self.check_zh() else 'en' self.none_sign='null' @property def num(self): return len(self.data) #检查四元组中是否有中文 def check_zh(self): for quad_dict in self.data: if re.search('[\u4e00-\u9fa5]',quad_dict['target_text']) or re.search('[\u4e00-\u9fa5]',quad_dict['opinion_text']): return True return False # 检测极性是否正确 def check_polarity(self): #若有某个四元组的极性不是positive、negative、neutral,则返回False for quad_dict in self.data: if quad_dict['polarity'] not in ['positive', 'negative', 'neutral','pos','neg','neu','积极','消极','中性']: self.abnormal=True return False #将极性由英文转为中文 def convert_polarity_en2zh(self): for quad_dict in self.data: quad_dict['polarity']=self.polarity_en2zh[quad_dict['polarity']] return self #将极性由中文转为英文 def convert_polarity_zh2en(self): for quad_dict in self.data: quad_dict['polarity']=self.polarity_zh2en[quad_dict['polarity']] return self #检查是否有重复的四元组,若有则删除重复的 def del_duplicate(self): new_data=[] for quad_dict in self.data: if quad_dict not in new_data: new_data.append(quad_dict) self.data=new_data return self #检查是否有target和opinion都为null的四元组,若有则返回True def check_target_opinion_null(self): for quad_dict in self.data: if quad_dict['target_text']=='null' and quad_dict['opinion_text']=='null': return True return False #检查是否有target或opinion为null的四元组,若有则返回True def check_any_null(self): for quad_dict in self.data: if quad_dict['target_text']=='null' or quad_dict['opinion_text']=='null': return True return False @classmethod def from_str(cls, quadruple_str: str, tuple_len:Union[int,list,str]=4, format_code=0, sep_token1=' & ', sep_token2=' | '): data = [] abnormal=False #确保分隔符后面一定是空格 for i in range(len(quadruple_str)-1): if (quadruple_str[i] == sep_token1.strip() or quadruple_str[i] == sep_token2.strip()) and quadruple_str[i + 1] != ' ': quadruple_str = quadruple_str[:i + 1] + ' ' + quadruple_str[i + 1:] # 选择几元组,即创建列表索引,从四元组中抽出n元 if isinstance(tuple_len, int): tuple_index = list(range(tuple_len)) elif isinstance(tuple_len, list): tuple_index = tuple_len elif isinstance(tuple_len, str): # 例如将‘012’转换为[0,1,2] tuple_index = [int(i) for i in tuple_len] else: raise Exception('tuple_len参数错误') for quadruple in quadruple_str.split(sep_token1): if format_code == 0: # quadruple可能是target|opinion|aspect|polarity,也可能是target|opinion|aspect,也可能是target|opinion,若没有则为“None” quadruple_split=[unit.strip() for unit in quadruple.split(sep_token2)] if len(quadruple_split)>len(tuple_index): print('quadruple格式错误,过多元素', quadruple_str) abnormal=True quadruple_split=quadruple_split[0:len(tuple_index)] #过长则截断 elif len(quadruple_split)int: count = 0 for quad_dict in self.data: if quad_dict in other.data: count += 1 return count # 检查自身数据的四元组中target是否有重复 def check_target_repeat(self): target_list = [] for quad_dict in self.data: target_list.append(quad_dict['target_text']) return len(target_list) != len(set(target_list)) # 检查自身数据的四元组中opinion是否有重复 def check_opinion_repeat(self): opinion_list = [] for quad_dict in self.data: opinion_list.append(quad_dict['opinion_text']) return len(opinion_list) != len(set(opinion_list)) # 检查自身数据的四元组中aspect是否有重复 def check_aspect_repeat(self): aspect_list = [] for quad_dict in self.data: aspect_list.append(quad_dict['aspect']) return len(aspect_list) != len(set(aspect_list)) # 输出所有aspect的列表 def get_aspect_list(self): aspect_list = [] for quad_dict in self.data: aspect_list.append(quad_dict['aspect']) return aspect_list # 输出所有target的列表 def get_target_list(self): target_list = [] for quad_dict in self.data: target_list.append(quad_dict['target_text']) return target_list # 输出所有opinion的列表 def get_opinion_list(self): opinion_list = [] for quad_dict in self.data: opinion_list.append(quad_dict['opinion_text']) return opinion_list # 输出所有polarity的列表 def get_polarity_list(self): polarity_list = [] for quad_dict in self.data: polarity_list.append(quad_dict['polarity']) return polarity_list #对所有polarity进行综合 def merge_polarity(self): polarity_list = self.get_polarity_list() #判断是英文还是中文 if self.language == 'en': if 'pos' in polarity_list and 'neg' in polarity_list: return 'neu' elif 'pos' in polarity_list: return 'pos' elif 'neg' in polarity_list: return 'neg' else: return 'neu' else: if '积极' in polarity_list and '消极' in polarity_list: return '中性' elif '积极' in polarity_list: return '积极' elif '消极' in polarity_list: return '消极' else: return '中性' #检测是否有不合法opinion def check_opinion_in_comment(self, comment_text): for quad_dict in self.data: if quad_dict['opinion_text'] !='*' and (not quad_dict['opinion_text'] in comment_text): return False return True #检测是否有不合法target def check_target_in_comment(self,comment_text): for quad_dict in self.data: if quad_dict['target_text'] !='*' and (not quad_dict['target_text'] in comment_text): return False return True #计算两个四元组的相似度 @staticmethod def get_similarity(units1, units2: 'CommentUnitsSim'): pass #对自身数据进行操作 def apply(self,func:Callable,field:str): for quad_dict in self.data: quad_dict[field] = func(quad_dict[field]) return self #四元组匹配函数 class CommentUnitsMatch: def __init__(self,target_weight=0.5,opinion_weight=0.5,aspect_weight=0.5,polarity_weight=0.5): #归一化权重 weight_sum = target_weight+opinion_weight+aspect_weight+polarity_weight self.target_weight = target_weight/weight_sum self.opinion_weight = opinion_weight/weight_sum self.aspect_weight = aspect_weight/weight_sum self.polarity_weight = polarity_weight/weight_sum #特定feature置零 def set_zero(self,feature:str='polarity'): if feature == 'polarity': self.polarity_weight = 0 elif feature == 'aspect': self.aspect_weight = 0 elif 'opinion' in feature: self.opinion_weight = 0 elif 'target' in feature: self.target_weight = 0 else: raise Exception('feature参数错误') def re_normalize(self): weight_sum = self.target_weight+self.opinion_weight+self.aspect_weight+self.polarity_weight self.target_weight = self.target_weight/weight_sum self.opinion_weight = self.opinion_weight/weight_sum self.aspect_weight = self.aspect_weight/weight_sum self.polarity_weight = self.polarity_weight/weight_sum #计算cost矩阵 def get_cost_matrix(self,units1: 'CommentUnitsSim', units2: 'CommentUnitsSim',feature:str='polarity'): pass #检查此feature是否存在,不存在则返回全0矩阵 if units1.data[0].get(feature) is None or units2.data[0].get(feature) is None\ or units1.data[0].get(feature)=='None' or units2.data[0].get(feature)=='None': cost_matrix = np.zeros((len(units1.data),len(units2.data))) #对应feature的weight也为0 self.set_zero(feature) # 并再次归一化 self.re_normalize() return cost_matrix #检查两个四元组的极性是否相同,生成cost矩阵,用于匈牙利算法。不相同则cost为1,相同则cost为0 cost_matrix = [] for quad_dict1 in units1.data: cost_list = [] for quad_dict2 in units2.data: if quad_dict1[feature] == quad_dict2[feature]: cost_list.append(0) else: cost_list.append(1) cost_matrix.append(cost_list) #cost矩阵转换为numpy数组,大小为(len(units1.data),len(units2.data)) cost_matrix = np.array(cost_matrix) return cost_matrix #计算cost矩阵,使用rouge指标 def get_cost_matrix_rouge(self,units1: 'CommentUnitsSim', units2: 'CommentUnitsSim',feature:str='target_text'): #检查此feature是否存在,不存在则返回全0矩阵 if units1.data[0].get(feature) is None or units2.data[0].get(feature) is None\ or units1.data[0].get(feature)=='None' or units2.data[0].get(feature)=='None': cost_matrix = np.zeros((len(units1.data),len(units2.data))) #对应feature的weight也为0 self.set_zero(feature) # 并再次归一化 self.re_normalize() return cost_matrix #检查两个四元组的极性是否相同,生成cost矩阵,用于匈牙利算法。相同则cost为0,不相同则cost为1-rougel cost_matrix = [] for quad_dict1 in units1.data: cost_list = [] for quad_dict2 in units2.data: if quad_dict1[feature] == quad_dict2[feature]: cost_list.append(0) else: cost_list.append(1-get_rougel_f1([quad_dict1[feature]],[quad_dict2[feature]])) cost_matrix.append(cost_list) #cost矩阵转换为numpy数组,大小为(len(units1.data),len(units2.data)) cost_matrix = np.array(cost_matrix) return cost_matrix def match_units(self,units1: 'CommentUnitsSim', units2: 'CommentUnitsSim',one_match=True)->tuple: #计算极性的cost矩阵,矩阵元素在0-1之间 cost_matrix_polarity = self.get_cost_matrix(units1, units2,feature='polarity') #计算aspect的cost矩阵 cost_matrix_aspect = self.get_cost_matrix(units1, units2,feature='aspect') #计算target的cost矩阵 cost_matrix_target = self.get_cost_matrix_rouge(units1, units2,feature='target_text') #计算opinion的cost矩阵 cost_matrix_opinion = self.get_cost_matrix_rouge(units1, units2,feature='opinion_text') #计算总的cost矩阵,矩阵元素在0-1之间。矩阵的行数为units1即pred的数量,列数为units2即true的数量 cost_matrix = self.target_weight*cost_matrix_target + self.opinion_weight*cost_matrix_opinion + \ self.aspect_weight*cost_matrix_aspect + self.polarity_weight*cost_matrix_polarity score_matrix = 1-cost_matrix #使用匈牙利算法进行匹配 if one_match: row_ind, col_ind = linear_sum_assignment(cost_matrix) else: #允许一对多的匹配 row_ind = np.argmin(cost_matrix, axis=0) col_ind = np.arange(len(units2.data)) max_units_num=max(units1.num,units2.num) #计算这种匹配的cost cost = 0 for i in range(len(row_ind)): cost += cost_matrix[row_ind[i]][col_ind[i]] #计算这种匹配下的TP\FP\FN TP = 0 for i in range(len(row_ind)): TP += score_matrix[row_ind[i]][col_ind[i]] #len(row_ind)为pred的数量,TP为匹配上的数量 FP = units1.num-TP FN = units2.num-TP #匹配不上的四元组,cost为1 cost += (max_units_num-len(row_ind)) cost_per_quadruple=cost/max_units_num if cost_per_quadruple>1 or cost_per_quadruple <0: print('cost错误',cost_per_quadruple,'pred:',units1.data,'true:',units2.data) print(self.target_weight,self.opinion_weight,self.aspect_weight,self.polarity_weight) #返回的cost在0-1之间 return cost_per_quadruple,TP,FP,FN @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION) class QuadMatch(evaluate.Metric): """TODO: Short description of my evaluation module.""" def _info(self): # TODO: Specifies the evaluate.EvaluationModuleInfo object return evaluate.MetricInfo( # This is the description that will appear on the modules page. module_type="metric", description=_DESCRIPTION, citation=_CITATION, inputs_description=_KWARGS_DESCRIPTION, # This defines the format of each prediction and reference features=[ datasets.Features( { "predictions": datasets.Value("string", id="sequence"), "references": datasets.Sequence(datasets.Value("string", id="sequence")), } ), datasets.Features( { "predictions": datasets.Value("string", id="sequence"), "references": datasets.Value("string", id="sequence"), } ), ], # Homepage of the module for documentation homepage="http://module.homepage", # Additional links to the codebase or references codebase_urls=["http://github.com/path/to/codebase/of/new_module"], reference_urls=["http://path.to.reference.url/new_module"] ) def _download_and_prepare(self, dl_manager): """Optional: download external resources useful to compute the scores""" # TODO: Download external resources if needed pass def _compute(self, predictions:List[str], references: Union[List[str],List[List[str]]], quad_weights:tuple=(1,1,1,1), **kwargs) -> dict: ''' :param predictions: list of predictions of sentiment quads :param references: list of references of sentiment quads :param quad_weights: weight of target,opinion,aspect,polarity for cost compute :param kwargs: :param tuple_len: indicate the format of the quad, see the following mapping :param sep_token1: the token to seperate quads :param sep_token2: the token to seperate units of one quad :return:average matching score #mapping id2prompt={'0123':"quadruples (target | opinion | aspect | polarity)", '':"quadruples (target | opinion | aspect | polarity)", '01':'pairs (target | opinion)', '012':'triples (target | opinion | aspect)', '013':'triples (target | opinion | polarity)', '023':'triples (target | aspect | polarity)', '23':'pairs (aspect | polarity)', '03':'pairs (target | polarity)', '13':'pairs (opinion | polarity)', '3':'single (polarity)'} #中文版映射 id2prompt_zh={'0123': "四元组(对象 | 观点 | 方面 | 极性)", '':"四元组(对象 | 观点 | 方面 | 极性)", '01':'二元组(对象 | 观点)', '012':'三元组(对象 | 观点 | 方面)', '013':'三元组(对象 | 观点 | 极性)', '023':'三元组(对象 | 方面 | 极性)', '23':'二元组(方面 | 极性)', '03':'二元组(对象 | 极性)', '13':'二元组(观点 | 极性)', '3':'单元素(极性)'} ''' assert len(predictions) == len(references) if isinstance(predictions,str): predictions=[predictions] references=[references] cost=0 TP,FP,FN=0,0,0 matcher = CommentUnitsMatch(*quad_weights) for pred, true in zip(predictions, references): pred = CommentUnitsSim.from_str(pred,**kwargs) # 如果true是list,说明有多个正确答案 if isinstance(true, str): true = CommentUnitsSim.from_str(true, **kwargs) elif isinstance(true, list): true=[CommentUnitsSim.from_str(t, **kwargs) for t in true] else: print("true的类型不对",true) continue #如果true是list,说明有多个正确答案,取最高分 if isinstance(true, list): cost_list=[matcher.match_units(pred,t,one_match=True) for t in true] # 获取得分最高的值的索引,按元组中第一个元素大小排序 cost_,TP_,FP_,FN_ = cost_list[np.argmax([c[0] for c in cost_list])] cost += cost_ TP+=TP_ FP+=FP_ FN+=FN_ else: cost_,TP_,FP_,FN_ = matcher.match_units(pred,true,one_match=True) cost += cost_ TP+=TP_ FP+=FP_ FN+=FN_ #平均cost cost=cost/len(predictions) #由TP\FP\FN计算最优匹配F1 precision_match=TP/(TP+FP) recall_match=TP/(TP+FN) f1_match=2*precision_match*recall_match/(precision_match+recall_match) f1=compute_quadruple_f1(y_pred=predictions,y_true=references, **kwargs) #取1-cost为得分 return {'ave match score of weight '+str(quad_weights):1-cost, 'f1 score of optimal match of weight '+str(quad_weights): f1_match, 'f1 score of exact match':f1}