LM-Explanation-Demo

Running

App Files Files Community

JRQi commited on Sep 28, 2023

Commit

686b10f

1 Parent(s): cf3bdbb

Update game3.py

Browse files

Files changed (1) hide show

game3.py +68 -2

game3.py CHANGED Viewed

@@ -109,6 +109,8 @@ def func3(num_selected, human_predict, num1, num2, user_important):
 def interpre3(num_selected):
     fname = 'data3_convai2_inferred.txt'
     with open(fname) as f:
         content = f.readlines()
         text = eval(content[int(num_selected*2)])
@@ -116,7 +118,38 @@ def interpre3(num_selected):
     print(interpretation)
-    res = {"original": text['text'], "interpretation": interpretation}
     # pos = []
     # neg = []
     # res = []
@@ -156,6 +189,7 @@ def func3_written(text_written, human_predict, lang_written):
     device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
     classifier = pipeline("text-classification", model="padmajabfrl/Gender-Classification", device=device)
     output = classifier([text_written])
@@ -181,8 +215,40 @@ def func3_written(text_written, human_predict, lang_written):
     shap_values = explainer([text_written])
     interpretation = list(zip(shap_values.data[0], shap_values.values[0, :, 1]))
-    res = {"original": text_written, "interpretation": interpretation}
     print(res)
     return res, ai_predict, chatbot

 def interpre3(num_selected):
     fname = 'data3_convai2_inferred.txt'
+    tokenizer = AutoTokenizer.from_pretrained("padmajabfrl/Gender-Classification")
     with open(fname) as f:
         content = f.readlines()
         text = eval(content[int(num_selected*2)])
     print(interpretation)
+    encodings = tokenizer(text['text'], return_offsets_mapping=True)
+    print(encodings['offset_mapping'])
+    is_subword = [False, False]
+    for i in range(2, len(encodings['offset_mapping'])):
+        if encodings['offset_mapping'][i][0] == encodings['offset_mapping'][i-1][1]:
+            is_subword.append(True)
+        else:
+            is_subword.append(False)
+    print(is_subword)
+    interpretation_combined = []
+    index_tmp = 0
+    while index_tmp < (len(interpretation) - 1):
+        if not is_subword[index_tmp+1]:
+            interpretation_combined.append(interpretation[index_tmp])
+            index_tmp += 1
+        else:
+            text_combined = interpretation[index_tmp][0]
+            score_combinded = interpretation[index_tmp][1]
+            length = 1
+            while is_subword[index_tmp+length]:
+                text_combined += interpretation[index_tmp+length][0]
+                score_combinded += interpretation[index_tmp+length][1]
+                length += 1
+            interpretation_combined.append((text_combined, score_combinded/length))
+            index_tmp += length
+    interpretation_combined.append(('', 0.0))
+    print(interpretation_combined)
+    res = {"original": text['text'], "interpretation": interpretation_combined}
     # pos = []
     # neg = []
     # res = []
     device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
     classifier = pipeline("text-classification", model="padmajabfrl/Gender-Classification", device=device)
+    tokenizer = AutoTokenizer.from_pretrained("padmajabfrl/Gender-Classification")
     output = classifier([text_written])
     shap_values = explainer([text_written])
     interpretation = list(zip(shap_values.data[0], shap_values.values[0, :, 1]))
+    encodings = tokenizer(text['text'], return_offsets_mapping=True)
+    print(encodings['offset_mapping'])
+    is_subword = [False, False]
+    for i in range(2, len(encodings['offset_mapping'])):
+        if encodings['offset_mapping'][i][0] == encodings['offset_mapping'][i-1][1]:
+            is_subword.append(True)
+        else:
+            is_subword.append(False)
+    print(is_subword)
+    interpretation_combined = []
+    index_tmp = 0
+    while index_tmp < (len(interpretation) - 1):
+        if not is_subword[index_tmp+1]:
+            interpretation_combined.append(interpretation[index_tmp])
+            index_tmp += 1
+        else:
+            text_combined = interpretation[index_tmp][0]
+            score_combinded = interpretation[index_tmp][1]
+            length = 1
+            while is_subword[index_tmp+length]:
+                text_combined += interpretation[index_tmp+length][0]
+                score_combinded += interpretation[index_tmp+length][1]
+                length += 1
+            interpretation_combined.append((text_combined, score_combinded/length))
+            index_tmp += length
+    interpretation_combined.append(('', 0.0))
+    print(interpretation_combined)
+    res = {"original": text_written, "interpretation": interpretation_combined}
     print(res)
     return res, ai_predict, chatbot