hHoai commited on
Commit
68c0ef8
·
verified ·
1 Parent(s): 939d153

Update bartpho/utils.py

Browse files
Files changed (1) hide show
  1. bartpho/utils.py +272 -270
bartpho/utils.py CHANGED
@@ -1,270 +1,272 @@
1
- import torch
2
- import numpy as np
3
- from bartpho.preprocess import tokenize, normalize
4
-
5
-
6
- tag_dict = {
7
- "RESTAURANT#GENERAL": "chung về nhà_hàng",
8
- "RESTAURANT#PRICES": "giá của nhà_hàng",
9
- "RESTAURANT#MISCELLANEOUS": "tổng_quát về nhà_hàng",
10
- "FOOD#PRICES": "giá đồ ăn",
11
- "FOOD#QUALITY": "chất_lượng đồ ăn",
12
- "FOOD#STYLE&OPTIONS": "phong_cách và lựa_chọn đồ ăn",
13
- "DRINKS#PRICES": "giá đồ uống",
14
- "DRINKS#QUALITY": "chất_lượng đồ uống",
15
- "DRINKS#STYLE&OPTIONS": "phong_cách và lựa_chọn đồ uống",
16
- "AMBIENCE#GENERAL": "bầu không_khí",
17
- "SERVICE#GENERAL": "dịch_vụ",
18
- "LOCATION#GENERAL": "vị_trí",
19
- }
20
-
21
- polarity_dict = {
22
- "không có": "không có",
23
- "positive": "tích_cực",
24
- "neutral": "trung_lập",
25
- "negative": "tiêu_cực"
26
- }
27
-
28
- polarity_list = ["không có", "tích_cực", "trung_lập", "tiêu_cực"]
29
- tags = ["chung về nhà_hàng", "giá của nhà_hàng", "tổng_quát về nhà_hàng", "giá đồ ăn",
30
- "chất_lượng đồ ăn", "phong_cách và lựa_chọn đồ ăn", "giá đồ uống", "chất_lượng đồ uống",
31
- "phong_cách và lựa_chọn đồ uống", "bầu không_khí", "dịch_vụ", "vị_trí"]
32
- eng_tags = ["RESTAURANT#GENERAL", "RESTAURANT#PRICES", "RESTAURANT#MISCELLANEOUS", "FOOD#PRICES",
33
- "FOOD#QUALITY", "FOOD#STYLE&OPTIONS", "DRINKS#PRICES", "DRINKS#QUALITY",
34
- "DRINKS#STYLE&OPTIONS", "AMBIENCE#GENERAL", "SERVICE#GENERAL", "LOCATION#GENERAL"]
35
- eng_polarity = ["không có", "positive", "neutral", "negative"]
36
- detect_labels = ['không', 'có']
37
- no_polarity = len(polarity_list)
38
- no_tag = len(tags)
39
-
40
- def predict(model, text, tokenizer, model_tokenize=None, device='cuda', processed=True, printout=False):
41
- predicts = []
42
- model.to(device)
43
- model.eval()
44
- model.config.use_cache = False
45
-
46
- if not processed:
47
- text = normalize(text)
48
- text = tokenize(text, model_tokenize)
49
-
50
- for i in range(no_tag):
51
- tag = tags[i]
52
- score_list = []
53
- input_ids = tokenizer([text] * no_polarity, return_tensors='pt')['input_ids']
54
- target_list = ["Nhận_xét " + tag.lower() + " " + polarity.lower() + " ." for polarity in polarity_list]
55
- output_ids = tokenizer(target_list, return_tensors='pt', padding=True, truncation=True)['input_ids']
56
-
57
- with torch.no_grad():
58
- output = model(input_ids=input_ids.to(device), decoder_input_ids=output_ids.to(device))[0]
59
- logits = output.softmax(dim=-1).to('cpu').numpy()
60
- for m in range(no_polarity):
61
- score = 1
62
- for n in range(logits[m].shape[0] - 2):
63
- score *= logits[m][n][output_ids[m][n+1]]
64
- score_list.append(score)
65
- predict = np.argmax(score_list)
66
- predicts.append(predict)
67
-
68
- if printout:
69
- result = {}
70
- for i in range(no_tag):
71
- if predicts[i] != 0:
72
- result[eng_tags[i]] = eng_polarity[predicts[i]]
73
- print(result)
74
- return predicts
75
-
76
- def predict_df(model, df, tokenizer=None, model_tokenize=None, tokenizer_name='vinai/bartpho-word-base',
77
- device='cuda', processed=True, printout=True):
78
- model.eval()
79
- model.to(device)
80
- model.config.use_cache = False
81
- if not tokenizer:
82
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
83
-
84
- count_acc = count_detect = f1_detect = f1_absa = pre_detect = rec_detect = pre_absa = rec_absa = 0
85
- total_f1 = len(df)
86
- total = len(df) * no_tag
87
-
88
- for i in range(total_f1):
89
- text = df['text'][i]
90
- labels = [df[x][i] for x in eng_tags]
91
- predicts = predict(model, text, tokenizer, model_tokenize, device, processed)
92
-
93
- labels_detect = [i for i in range(no_tag) if labels[i] != 0]
94
- predicts_detect = [i for i in range(no_tag) if predicts[i] != 0]
95
- common_detect = [x for x in labels_detect if x in predicts_detect]
96
-
97
- if common_detect:
98
- precision_detect = len(common_detect) / len(predicts_detect)
99
- recall_detect = len(common_detect) / len(labels_detect)
100
- f1_detect += (2 * precision_detect * recall_detect / (precision_detect + recall_detect))
101
- pre_detect += precision_detect
102
- rec_detect += recall_detect
103
-
104
- labels_absa = [str(i) + '-' + str(labels[i]) for i in range(no_tag) if labels[i] != 0]
105
- predicts_absa = [str(i) + '-' + str(predicts[i]) for i in range(no_tag) if predicts[i] != 0]
106
- common_absa = [x for x in labels_absa if x in predicts_absa]
107
-
108
- if common_absa:
109
- precision_absa = len(common_absa) / len(predicts_absa)
110
- recall_absa = len(common_absa) / len(labels_absa)
111
- f1_absa += (2 * precision_absa * recall_absa / (precision_absa + recall_absa))
112
- pre_absa += precision_absa
113
- rec_absa += recall_absa
114
-
115
- for j in range(no_tag):
116
- if labels[j] == predicts[j]:
117
- count_acc += 1
118
- count_detect += 1
119
- else:
120
- if labels[j] != 0 and predicts[j] != 0:
121
- count_detect += 1
122
-
123
- acc_detect = count_detect / total
124
- pre_detect = pre_detect / total_f1
125
- rec_detect = rec_detect / total_f1
126
- f1_detect = f1_detect / total_f1
127
-
128
- acc = count_acc / total
129
- pre_absa = pre_absa / total_f1
130
- rec_absa = rec_absa / total_f1
131
- f1_absa = f1_absa / total_f1
132
-
133
- if printout:
134
- print(f"Detect acc: {acc_detect:.4f}%")
135
- print(f"Detect precision: {pre_detect:.4f}%")
136
- print(f"Detect recall: {rec_detect:.4f}%")
137
- print(f"Detect f1: {f1_detect:.4f}%")
138
- print()
139
- print(f"Absa acc: {acc:.4f}%")
140
- print(f"Absa precision: {pre_absa:.4f}%")
141
- print(f"Absa recall: {rec_absa:.4f}%")
142
- print(f"Absa f1: {f1_absa:.4f}%")
143
-
144
- return acc_detect, pre_detect, rec_detect, f1_detect, acc, pre_absa, rec_absa, f1_absa
145
-
146
- def predict_detect(model, text, tokenizer, model_tokenize=None, device='cuda', processed=True, printout=False):
147
- detect_predicts = []
148
- model.to(device)
149
- model.eval()
150
- model.config.use_cache = False
151
-
152
- if not processed:
153
- text = normalize(text)
154
- text = tokenize(text, model_tokenize)
155
-
156
- for i in range(no_tag):
157
- tag = tags[i]
158
- detect_score_list = []
159
- input_ids = tokenizer([text] * 2, return_tensors='pt')['input_ids']
160
- target_list = [tag.lower() + " " + detect_label.lower() + " được nhận_xét ." for detect_label in detect_labels]
161
- output_ids = tokenizer(target_list, return_tensors='pt', padding=True, truncation=True)['input_ids']
162
-
163
- with torch.no_grad():
164
- output = model(input_ids=input_ids.to(device), decoder_input_ids=output_ids.to(device))[0]
165
- logits = output.softmax(dim=-1).to('cpu').numpy()
166
- for m in range(2):
167
- detect_score = 1
168
- for n in range(logits[m].shape[0] - 2):
169
- detect_score *= logits[m][n][output_ids[m][n+1]]
170
- detect_score_list.append(detect_score)
171
- detect_predict = np.argmax(detect_score_list)
172
- detect_predicts.append(detect_predict)
173
-
174
- predicts = []
175
- for i in range(no_tag):
176
- if detect_predicts[i] == 0:
177
- predicts.append(0)
178
- else:
179
- tag = tags[i]
180
- score_list = []
181
- input_ids = tokenizer([text] * (no_polarity - 1), return_tensors='pt')['input_ids']
182
- target_list = ["Nhận_xét " + tag.lower() + " " + polarity.lower() + " ." for polarity in polarity_list if polarity != "không có"]
183
- output_ids = tokenizer(target_list, return_tensors='pt', padding=True, truncation=True)['input_ids']
184
-
185
- with torch.no_grad():
186
- output = model(input_ids=input_ids.to(device), decoder_input_ids=output_ids.to(device))[0]
187
- logits = output.softmax(dim=-1).to('cpu').numpy()
188
- for m in range(no_polarity - 1):
189
- score = 1
190
- for n in range(logits[m].shape[0] - 2):
191
- score *= logits[m][n][output_ids[m][n + 1]]
192
- score_list.append(score)
193
- predict = np.argmax(score_list) + 1
194
- predicts.append(predict)
195
-
196
- if printout:
197
- result = {}
198
- for i in range(no_tag):
199
- if predicts[i] != 0:
200
- result[eng_tags[i]] = eng_polarity[predicts[i]]
201
- print(result)
202
- return predicts
203
-
204
- def predict_df_detect(model, df, tokenizer=None, model_tokenize=None, tokenizer_name='vinai/bartpho-word-base',
205
- device='cuda', printout=True):
206
- model.eval()
207
- model.to(device)
208
- model.config.use_cache = False
209
- if not tokenizer:
210
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
211
- count_acc = count_detect = f1_detect = f1_absa = pre_detect = rec_detect = pre_absa = rec_absa = 0
212
-
213
- total_f1 = len(df)
214
- total = len(df) * no_tag
215
-
216
- for i in range(total_f1):
217
- text = df['text'][i]
218
- labels = [df[x][i] for x in eng_tags]
219
- predicts = predict(model, text, tokenizer, model_tokenize, processed, device)
220
-
221
- labels_detect = [i for i in range(no_tag) if labels[i] != 0]
222
- predicts_detect = [i for i in range(no_tag) if predicts[i] != 0]
223
- common_detect = [x for x in labels_detect if x in predicts_detect]
224
- if common_detect:
225
- precision_detect = len(common_detect) / len(predicts_detect)
226
- recall_detect = len(common_detect) / len(labels_detect)
227
- f1_detect += (2 * precision_detect * recall_detect / (precision_detect + recall_detect))
228
- pre_detect += precision_detect
229
- rec_detect += recall_detect
230
-
231
- labels_absa = [str(i) + '-' + str(labels[i]) for i in range(no_tag) if labels[i] != 0]
232
- predicts_absa = [str(i) + '-' + str(predicts[i]) for i in range(no_tag) if predicts[i] != 0]
233
- common_absa = [x for x in labels_absa if x in predicts_absa]
234
- if common_absa:
235
- precision_absa = len(common_absa) / len(predicts_absa)
236
- recall_absa = len(common_absa) / len(labels_absa)
237
- f1_absa += (2 * precision_absa * recall_absa / (precision_absa + recall_absa))
238
- pre_absa += precision_absa
239
- rec_absa += recall_absa
240
-
241
- for j in range(no_tag):
242
- if labels[j] == predicts[j]:
243
- count_acc += 1
244
- count_detect += 1
245
- else:
246
- if labels[j] != 0 and predicts[j] != 0:
247
- count_detect += 1
248
-
249
- acc_detect = count_detect / total
250
- pre_detect = pre_detect / total_f1
251
- rec_detect = rec_detect / total_f1
252
- f1_detect = f1_detect / total_f1
253
-
254
- acc = count_acc / total
255
- pre_absa = pre_absa / total_f1
256
- rec_absa = rec_absa / total_f1
257
- f1_absa = f1_absa / total_f1
258
-
259
- if printout:
260
- print(f"Detect acc: {acc_detect:.4f}%")
261
- print(f"Detect precision: {pre_detect:.4f}%")
262
- print(f"Detect recall: {rec_detect:.4f}%")
263
- print(f"Detect f1: {f1_detect:.4f}%")
264
- print()
265
- print(f"Absa acc: {acc:.4f}%")
266
- print(f"Absa precision: {pre_absa:.4f}%")
267
- print(f"Absa recall: {rec_absa:.4f}%")
268
- print(f"Absa f1: {f1_absa:.4f}%")
269
-
270
- return acc_detect, pre_detect, rec_detect, f1_detect, acc, pre_absa, rec_absa, f1_absa
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ from bartpho.preprocess import tokenize, normalize
4
+
5
+
6
+ tag_dict = {
7
+ "RESTAURANT#GENERAL": "chung về nhà_hàng",
8
+ "RESTAURANT#PRICES": "giá của nhà_hàng",
9
+ "RESTAURANT#MISCELLANEOUS": "tổng_quát về nhà_hàng",
10
+ "FOOD#PRICES": "giá đồ ăn",
11
+ "FOOD#QUALITY": "chất_lượng đồ ăn",
12
+ "FOOD#STYLE&OPTIONS": "phong_cách và lựa_chọn đồ ăn",
13
+ "DRINKS#PRICES": "giá đồ uống",
14
+ "DRINKS#QUALITY": "chất_lượng đồ uống",
15
+ "DRINKS#STYLE&OPTIONS": "phong_cách và lựa_chọn đồ uống",
16
+ "AMBIENCE#GENERAL": "bầu không_khí",
17
+ "SERVICE#GENERAL": "dịch_vụ",
18
+ "LOCATION#GENERAL": "vị_trí",
19
+ }
20
+
21
+ polarity_dict = {
22
+ "không có": "không có",
23
+ "positive": "tích_cực",
24
+ "neutral": "trung_lập",
25
+ "negative": "tiêu_cực"
26
+ }
27
+
28
+ polarity_list = ["không có", "tích_cực", "trung_lập", "tiêu_cực"]
29
+ tags = ["chung về nhà_hàng", "giá của nhà_hàng", "tổng_quát về nhà_hàng", "giá đồ ăn",
30
+ "chất_lượng đồ ăn", "phong_cách và lựa_chọn đồ ăn", "giá đồ uống", "chất_lượng đồ uống",
31
+ "phong_cách và lựa_chọn đồ uống", "bầu không_khí", "dịch_vụ", "vị_trí"]
32
+ eng_tags = ["RESTAURANT#GENERAL", "RESTAURANT#PRICES", "RESTAURANT#MISCELLANEOUS", "FOOD#PRICES",
33
+ "FOOD#QUALITY", "FOOD#STYLE&OPTIONS", "DRINKS#PRICES", "DRINKS#QUALITY",
34
+ "DRINKS#STYLE&OPTIONS", "AMBIENCE#GENERAL", "SERVICE#GENERAL", "LOCATION#GENERAL"]
35
+ eng_polarity = ["không có", "positive", "neutral", "negative"]
36
+ detect_labels = ['không', 'có']
37
+ no_polarity = len(polarity_list)
38
+ no_tag = len(tags)
39
+
40
+ def predict(model, text, tokenizer, model_tokenize=None, processed=True, printout=False):
41
+ predicts = []
42
+ device = 'cpu'
43
+ model.to(device)
44
+ model.eval()
45
+ model.config.use_cache = False
46
+
47
+ if not processed:
48
+ text = normalize(text)
49
+ text = tokenize(text, model_tokenize)
50
+
51
+ for i in range(no_tag):
52
+ tag = tags[i]
53
+ score_list = []
54
+ input_ids = tokenizer([text] * no_polarity, return_tensors='pt')['input_ids']
55
+ target_list = ["Nhận_xét " + tag.lower() + " " + polarity.lower() + " ." for polarity in polarity_list]
56
+ output_ids = tokenizer(target_list, return_tensors='pt', padding=True, truncation=True)['input_ids']
57
+
58
+ with torch.no_grad():
59
+ output = model(input_ids=input_ids.to(device), decoder_input_ids=output_ids.to(device))[0]
60
+ logits = output.softmax(dim=-1).to('cpu').numpy()
61
+ for m in range(no_polarity):
62
+ score = 1
63
+ for n in range(logits[m].shape[0] - 2):
64
+ score *= logits[m][n][output_ids[m][n+1]]
65
+ score_list.append(score)
66
+ predict = np.argmax(score_list)
67
+ predicts.append(predict)
68
+
69
+ if printout:
70
+ result = {}
71
+ for i in range(no_tag):
72
+ if predicts[i] != 0:
73
+ result[eng_tags[i]] = eng_polarity[predicts[i]]
74
+ print(result)
75
+ return predicts
76
+
77
+ def predict_df(model, df, tokenizer=None, model_tokenize=None, tokenizer_name='vinai/bartpho-word-base', processed=True, printout=True):
78
+ model.eval()
79
+ device = 'cpu'
80
+ model.to(device)
81
+ model.config.use_cache = False
82
+ if not tokenizer:
83
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
84
+
85
+ count_acc = count_detect = f1_detect = f1_absa = pre_detect = rec_detect = pre_absa = rec_absa = 0
86
+ total_f1 = len(df)
87
+ total = len(df) * no_tag
88
+
89
+ for i in range(total_f1):
90
+ text = df['text'][i]
91
+ labels = [df[x][i] for x in eng_tags]
92
+ predicts = predict(model, text, tokenizer, model_tokenize, device, processed)
93
+
94
+ labels_detect = [i for i in range(no_tag) if labels[i] != 0]
95
+ predicts_detect = [i for i in range(no_tag) if predicts[i] != 0]
96
+ common_detect = [x for x in labels_detect if x in predicts_detect]
97
+
98
+ if common_detect:
99
+ precision_detect = len(common_detect) / len(predicts_detect)
100
+ recall_detect = len(common_detect) / len(labels_detect)
101
+ f1_detect += (2 * precision_detect * recall_detect / (precision_detect + recall_detect))
102
+ pre_detect += precision_detect
103
+ rec_detect += recall_detect
104
+
105
+ labels_absa = [str(i) + '-' + str(labels[i]) for i in range(no_tag) if labels[i] != 0]
106
+ predicts_absa = [str(i) + '-' + str(predicts[i]) for i in range(no_tag) if predicts[i] != 0]
107
+ common_absa = [x for x in labels_absa if x in predicts_absa]
108
+
109
+ if common_absa:
110
+ precision_absa = len(common_absa) / len(predicts_absa)
111
+ recall_absa = len(common_absa) / len(labels_absa)
112
+ f1_absa += (2 * precision_absa * recall_absa / (precision_absa + recall_absa))
113
+ pre_absa += precision_absa
114
+ rec_absa += recall_absa
115
+
116
+ for j in range(no_tag):
117
+ if labels[j] == predicts[j]:
118
+ count_acc += 1
119
+ count_detect += 1
120
+ else:
121
+ if labels[j] != 0 and predicts[j] != 0:
122
+ count_detect += 1
123
+
124
+ acc_detect = count_detect / total
125
+ pre_detect = pre_detect / total_f1
126
+ rec_detect = rec_detect / total_f1
127
+ f1_detect = f1_detect / total_f1
128
+
129
+ acc = count_acc / total
130
+ pre_absa = pre_absa / total_f1
131
+ rec_absa = rec_absa / total_f1
132
+ f1_absa = f1_absa / total_f1
133
+
134
+ if printout:
135
+ print(f"Detect acc: {acc_detect:.4f}%")
136
+ print(f"Detect precision: {pre_detect:.4f}%")
137
+ print(f"Detect recall: {rec_detect:.4f}%")
138
+ print(f"Detect f1: {f1_detect:.4f}%")
139
+ print()
140
+ print(f"Absa acc: {acc:.4f}%")
141
+ print(f"Absa precision: {pre_absa:.4f}%")
142
+ print(f"Absa recall: {rec_absa:.4f}%")
143
+ print(f"Absa f1: {f1_absa:.4f}%")
144
+
145
+ return acc_detect, pre_detect, rec_detect, f1_detect, acc, pre_absa, rec_absa, f1_absa
146
+
147
+ def predict_detect(model, text, tokenizer, model_tokenize=None, processed=True, printout=False):
148
+ detect_predicts = []
149
+ device = 'cpu'
150
+ model.to(device)
151
+ model.eval()
152
+ model.config.use_cache = False
153
+
154
+ if not processed:
155
+ text = normalize(text)
156
+ text = tokenize(text, model_tokenize)
157
+
158
+ for i in range(no_tag):
159
+ tag = tags[i]
160
+ detect_score_list = []
161
+ input_ids = tokenizer([text] * 2, return_tensors='pt')['input_ids']
162
+ target_list = [tag.lower() + " " + detect_label.lower() + " được nhận_xét ." for detect_label in detect_labels]
163
+ output_ids = tokenizer(target_list, return_tensors='pt', padding=True, truncation=True)['input_ids']
164
+
165
+ with torch.no_grad():
166
+ output = model(input_ids=input_ids.to(device), decoder_input_ids=output_ids.to(device))[0]
167
+ logits = output.softmax(dim=-1).to('cpu').numpy()
168
+ for m in range(2):
169
+ detect_score = 1
170
+ for n in range(logits[m].shape[0] - 2):
171
+ detect_score *= logits[m][n][output_ids[m][n+1]]
172
+ detect_score_list.append(detect_score)
173
+ detect_predict = np.argmax(detect_score_list)
174
+ detect_predicts.append(detect_predict)
175
+
176
+ predicts = []
177
+ for i in range(no_tag):
178
+ if detect_predicts[i] == 0:
179
+ predicts.append(0)
180
+ else:
181
+ tag = tags[i]
182
+ score_list = []
183
+ input_ids = tokenizer([text] * (no_polarity - 1), return_tensors='pt')['input_ids']
184
+ target_list = ["Nhận_xét " + tag.lower() + " " + polarity.lower() + " ." for polarity in polarity_list if polarity != "không có"]
185
+ output_ids = tokenizer(target_list, return_tensors='pt', padding=True, truncation=True)['input_ids']
186
+
187
+ with torch.no_grad():
188
+ output = model(input_ids=input_ids.to(device), decoder_input_ids=output_ids.to(device))[0]
189
+ logits = output.softmax(dim=-1).to('cpu').numpy()
190
+ for m in range(no_polarity - 1):
191
+ score = 1
192
+ for n in range(logits[m].shape[0] - 2):
193
+ score *= logits[m][n][output_ids[m][n + 1]]
194
+ score_list.append(score)
195
+ predict = np.argmax(score_list) + 1
196
+ predicts.append(predict)
197
+
198
+ if printout:
199
+ result = {}
200
+ for i in range(no_tag):
201
+ if predicts[i] != 0:
202
+ result[eng_tags[i]] = eng_polarity[predicts[i]]
203
+ print(result)
204
+ return predicts
205
+
206
+ def predict_df_detect(model, df, tokenizer=None, model_tokenize=None, tokenizer_name='vinai/bartpho-word-base', printout=True):
207
+ model.eval()
208
+ device = 'cpu'
209
+ model.to(device)
210
+ model.config.use_cache = False
211
+ if not tokenizer:
212
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
213
+ count_acc = count_detect = f1_detect = f1_absa = pre_detect = rec_detect = pre_absa = rec_absa = 0
214
+
215
+ total_f1 = len(df)
216
+ total = len(df) * no_tag
217
+
218
+ for i in range(total_f1):
219
+ text = df['text'][i]
220
+ labels = [df[x][i] for x in eng_tags]
221
+ predicts = predict(model, text, tokenizer, model_tokenize, processed, device)
222
+
223
+ labels_detect = [i for i in range(no_tag) if labels[i] != 0]
224
+ predicts_detect = [i for i in range(no_tag) if predicts[i] != 0]
225
+ common_detect = [x for x in labels_detect if x in predicts_detect]
226
+ if common_detect:
227
+ precision_detect = len(common_detect) / len(predicts_detect)
228
+ recall_detect = len(common_detect) / len(labels_detect)
229
+ f1_detect += (2 * precision_detect * recall_detect / (precision_detect + recall_detect))
230
+ pre_detect += precision_detect
231
+ rec_detect += recall_detect
232
+
233
+ labels_absa = [str(i) + '-' + str(labels[i]) for i in range(no_tag) if labels[i] != 0]
234
+ predicts_absa = [str(i) + '-' + str(predicts[i]) for i in range(no_tag) if predicts[i] != 0]
235
+ common_absa = [x for x in labels_absa if x in predicts_absa]
236
+ if common_absa:
237
+ precision_absa = len(common_absa) / len(predicts_absa)
238
+ recall_absa = len(common_absa) / len(labels_absa)
239
+ f1_absa += (2 * precision_absa * recall_absa / (precision_absa + recall_absa))
240
+ pre_absa += precision_absa
241
+ rec_absa += recall_absa
242
+
243
+ for j in range(no_tag):
244
+ if labels[j] == predicts[j]:
245
+ count_acc += 1
246
+ count_detect += 1
247
+ else:
248
+ if labels[j] != 0 and predicts[j] != 0:
249
+ count_detect += 1
250
+
251
+ acc_detect = count_detect / total
252
+ pre_detect = pre_detect / total_f1
253
+ rec_detect = rec_detect / total_f1
254
+ f1_detect = f1_detect / total_f1
255
+
256
+ acc = count_acc / total
257
+ pre_absa = pre_absa / total_f1
258
+ rec_absa = rec_absa / total_f1
259
+ f1_absa = f1_absa / total_f1
260
+
261
+ if printout:
262
+ print(f"Detect acc: {acc_detect:.4f}%")
263
+ print(f"Detect precision: {pre_detect:.4f}%")
264
+ print(f"Detect recall: {rec_detect:.4f}%")
265
+ print(f"Detect f1: {f1_detect:.4f}%")
266
+ print()
267
+ print(f"Absa acc: {acc:.4f}%")
268
+ print(f"Absa precision: {pre_absa:.4f}%")
269
+ print(f"Absa recall: {rec_absa:.4f}%")
270
+ print(f"Absa f1: {f1_absa:.4f}%")
271
+
272
+ return acc_detect, pre_detect, rec_detect, f1_detect, acc, pre_absa, rec_absa, f1_absa