j-hartmann commited on
Commit
98fd87f
1 Parent(s): 5f849fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -95
app.py CHANGED
@@ -1,100 +1,12 @@
1
-
2
  # imports
3
  import gradio as gr
4
  import pandas as pd
 
 
 
 
 
5
  import numpy as np
6
  from numpy import dot
7
- from numpy.linalg import norm
8
-
9
-
10
- # compute dot product of inputs
11
- # summary function - test for single gradio function interfrace
12
- def gr_cosine_similarity(sentence1, sentence2):
13
-
14
- # load tokenizer and model, create trainer
15
- model_name = "j-hartmann/emotion-english-distilroberta-base"
16
- tokenizer = AutoTokenizer.from_pretrained(model_name)
17
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
18
- trainer = Trainer(model=model)
19
-
20
-
21
- # Create class for data preparation
22
- class SimpleDataset:
23
- def __init__(self, tokenized_texts):
24
- self.tokenized_texts = tokenized_texts
25
-
26
- def __len__(self):
27
- return len(self.tokenized_texts["input_ids"])
28
-
29
- def __getitem__(self, idx):
30
- return {k: v[idx] for k, v in self.tokenized_texts.items()}
31
-
32
-
33
- # sentences in list
34
- lines_s = [sentence1, sentence2]
35
-
36
- # Tokenize texts and create prediction data set
37
- tokenized_texts = tokenizer(lines_s, truncation=True, padding=True)
38
- pred_dataset = SimpleDataset(tokenized_texts)
39
-
40
- # Run predictions -> predict whole df
41
- predictions = trainer.predict(pred_dataset)
42
-
43
- # Transform predictions to labels
44
- preds = predictions.predictions.argmax(-1)
45
- labels = pd.Series(preds).map(model.config.id2label)
46
- scores = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)).max(1)
47
- # scores raw
48
- temp = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1, keepdims=True)).tolist()
49
-
50
-
51
- # work in progress
52
- # container
53
- anger = []
54
- disgust = []
55
- fear = []
56
- joy = []
57
- neutral = []
58
- sadness = []
59
- surprise = []
60
-
61
- # extract scores (as many entries as exist in pred_texts)
62
- for i in range(len(lines_s)):
63
- anger.append(temp[i][0])
64
- disgust.append(temp[i][1])
65
- fear.append(temp[i][2])
66
- joy.append(temp[i][3])
67
- neutral.append(temp[i][4])
68
- sadness.append(temp[i][5])
69
- surprise.append(temp[i][6])
70
-
71
- # define both vectors for the dot product
72
- # each include all values for both predictions
73
- v1 = temp[0]
74
- v2 = temp[1]
75
-
76
- # compute dot product of all
77
- dot_product = dot(v1, v2)
78
-
79
- # define df
80
- df = pd.DataFrame(list(zip(lines_s,labels, anger, disgust, fear, joy, neutral, sadness, surprise)), columns=['text','label', 'anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise'])
81
-
82
- # compute cosine similarity
83
- # is dot product of vectors n / norms 1*..*n vectors
84
- cosine_similarity = dot_product / (norm(v1) * norm(v2))
85
-
86
-
87
- # return dataframe for space output
88
- return df, cosine_similarity
89
-
90
- gr.Interface(gr_cosine_similarity,
91
- [
92
- gr.inputs.Textbox(lines=1, placeholder="This movie always makes me cry..", default="", label="Text 1"),
93
- gr.inputs.Textbox(lines=1, placeholder="Her dog is sad.", default="", label="Text 2"),
94
-
95
- #gr.outputs.Textbox(type="auto", label="Cosine similarity"),
96
- ],
97
- ["dataframe","text"]
98
- ).launch(debug=True)
99
-
100
-
 
 
1
  # imports
2
  import gradio as gr
3
  import pandas as pd
4
+ import tempfile
5
+ import itertools
6
+ # import required packages
7
+ import torch
8
+ import pandas as pd
9
  import numpy as np
10
  from numpy import dot
11
+ from numpy.linalg import norm, multi_dot
12
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer