ruanchaves commited on
Commit
69f0559
·
1 Parent(s): c69e84b
Files changed (2) hide show
  1. app.py +37 -30
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
-
5
  from collections import Counter
6
 
7
  def most_frequent(array):
@@ -42,14 +42,14 @@ exibindo a relação entre os pares textuais.)
42
  score_descriptions = {
43
  1: "There is an entailment relation between premise and hypothesis. If the premise is true, then the hypothesis must also be true.",
44
  0: "There is no logical relation between the premise and the hypothesis.",
45
- 2: "Additionally, the premise has also been detected as being a paraphrase of the hypothesis."
46
  }
47
 
48
 
49
  score_descriptions_pt = {
50
  1: "(Existe uma relação de implicação entre premissa e hipótese. Se a premissa é verdadeira, então a hipótese também deve ser verdadeira.)",
51
  0: "(Não há relação lógica entre a premissa e a hipótese.)",
52
- 2: "(Além disso, a premissa também foi detectada como sendo uma paráfrase da hipótese.)"
53
  }
54
 
55
  score_short_keys = {
@@ -77,6 +77,11 @@ user_friendly_name = {
77
  "ruanchaves/bert-large-portuguese-cased-assin-entailment": "BERTimbau large (ASSIN)"
78
  }
79
 
 
 
 
 
 
80
  model_array = []
81
 
82
  for model_name in model_list:
@@ -87,45 +92,47 @@ for model_name in model_list:
87
  model_array.append(row)
88
 
89
 
90
- def entailment(s1, s2):
 
 
91
  scores = {}
 
92
  for row in model_array:
93
- name = user_friendly_name[row["name"]]
94
- tokenizer = row["tokenizer"]
95
- model = row["model"]
96
- model_input = tokenizer(*([s1], [s2]), padding=True, return_tensors="pt")
97
- with torch.no_grad():
98
- output = model(**model_input)
99
- score = output[0][0].argmax().item()
100
- scores[name] = score
101
- assin2_scores = {k: v for k, v in scores.items() if "ASSIN 2" in k}
102
- average_score = most_frequent(assin2_scores.values())
103
- description = score_descriptions[average_score]
104
- description_pt = score_descriptions_pt[average_score]
105
-
106
- if 2 in scores.values():
107
- description = description + "\n" + score_descriptions[2]
108
- description_pt = description_pt + "\n" + score_descriptions_pt[2]
109
- final_description = description + "\n \n" + description_pt
110
-
111
- for key, value in scores.items():
112
- scores[key] = score_short_keys[value]
113
-
114
- return final_description, scores
115
 
116
 
117
  inputs = [
118
  gr.inputs.Textbox(label="Premise"),
119
- gr.inputs.Textbox(label="Hypothesis")
 
120
  ]
121
 
122
  outputs = [
123
- gr.Textbox(label="Evaluation", value=output_textbox_component_description),
124
- gr.JSON(label="Results by model", value=output_json_component_description)
125
  ]
126
 
127
 
128
- gr.Interface(fn=entailment, inputs=inputs, outputs=outputs, title=app_title,
129
  description=app_description,
130
  examples=app_examples,
131
  article = article_string).launch()
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
+ from scipy.special import softmax
5
  from collections import Counter
6
 
7
  def most_frequent(array):
 
42
  score_descriptions = {
43
  1: "There is an entailment relation between premise and hypothesis. If the premise is true, then the hypothesis must also be true.",
44
  0: "There is no logical relation between the premise and the hypothesis.",
45
+ 2: "The premise is a paraphrase of the hypothesis."
46
  }
47
 
48
 
49
  score_descriptions_pt = {
50
  1: "(Existe uma relação de implicação entre premissa e hipótese. Se a premissa é verdadeira, então a hipótese também deve ser verdadeira.)",
51
  0: "(Não há relação lógica entre a premissa e a hipótese.)",
52
+ 2: "(A premissa é uma paráfrase da hipótese.)"
53
  }
54
 
55
  score_short_keys = {
 
77
  "ruanchaves/bert-large-portuguese-cased-assin-entailment": "BERTimbau large (ASSIN)"
78
  }
79
 
80
+ reverse_user_friendly_name = { v:k for k,v in user_friendly_name.items() }
81
+
82
+ user_friendly_name_list = list(user_friendly_name.values())
83
+
84
+
85
  model_array = []
86
 
87
  for model_name in model_list:
 
92
  model_array.append(row)
93
 
94
 
95
+ def predict(s1, s2, chosen_model):
96
+ if not chosen_model:
97
+ chosen_model = user_friendly_name_list[0]
98
  scores = {}
99
+ full_chosen_model_name = reverse_user_friendly_name[chosen_model]
100
  for row in model_array:
101
+ name = row["name"]
102
+ if name != full_chosen_model_name:
103
+ continue
104
+ else:
105
+ tokenizer = row["tokenizer"]
106
+ model = row["model"]
107
+ model_input = tokenizer(*([s1], [s2]), padding=True, return_tensors="pt")
108
+ with torch.no_grad():
109
+ output = model(**model_input)
110
+ logits = output[0][0].detach().numpy()
111
+ logits = softmax(logits).tolist()
112
+ break
113
+ def get_description(idx):
114
+ description = score_descriptions[idx]
115
+ description_pt = score_descriptions_pt[idx]
116
+ final_description = description + "\n \n" + description_pt
117
+ return final_description
118
+
119
+ scores = { get_description(k):v for k,v in enumerate(logits) }
120
+
121
+ return scores
 
122
 
123
 
124
  inputs = [
125
  gr.inputs.Textbox(label="Premise"),
126
+ gr.inputs.Textbox(label="Hypothesis"),
127
+ gr.Dropdown(label="Model", choices=user_friendly_name_list, default=user_friendly_name_list[0])
128
  ]
129
 
130
  outputs = [
131
+ gr.Label(label="Result")
 
132
  ]
133
 
134
 
135
+ gr.Interface(fn=predict, inputs=inputs, outputs=outputs, title=app_title,
136
  description=app_description,
137
  examples=app_examples,
138
  article = article_string).launch()
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  torch
2
  gradio
3
- transformers
 
 
1
  torch
2
  gradio
3
+ transformers
4
+ scipy