samueldomdey commited on
Commit
1029ec0
·
1 Parent(s): 0c27b04

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -10
app.py CHANGED
@@ -23,11 +23,31 @@ def bulk_function(filename):
23
  def __getitem__(self, idx):
24
  return {k: v[idx] for k, v in self.tokenized_texts.items()}
25
 
26
- # read file lines
27
- with open(filename.name, "r") as f:
28
- lines = f.readlines()
29
- # expects unnamed:0 or index, col name -> strip both
30
- lines_s = [item.split("\n")[0].split(",")[-1] for item in lines][1:]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  # Tokenize texts and create prediction data set
33
  tokenized_texts = tokenizer(lines_s,truncation=True,padding=True)
@@ -40,10 +60,13 @@ def bulk_function(filename):
40
  preds = predictions.predictions.argmax(-1)
41
  labels = pd.Series(preds).map(model.config.id2label)
42
  scores = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)).max(1)
 
 
 
 
43
  # scores raw
44
  temp = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True))
45
 
46
- # work in progress
47
  # container
48
  anger = []
49
  disgust = []
@@ -64,8 +87,8 @@ def bulk_function(filename):
64
  surprise.append(round(temp[i][6], 2))
65
 
66
  # define df
67
- df = pd.DataFrame(list(zip(lines_s,preds,labels,scores, anger, disgust, fear, joy, neutral, sadness, surprise)), columns=['text','pred','label','score', 'anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise'])
68
-
69
  # save results to csv
70
  YOUR_FILENAME = filename.name.split(".")[0] + "_emotion_predictions" + ".csv" # name your output file
71
  df.to_csv(YOUR_FILENAME)
@@ -73,5 +96,9 @@ def bulk_function(filename):
73
  # return dataframe for space output
74
  return YOUR_FILENAME
75
 
76
- gr.Interface(bulk_function, [gr.inputs.File(file_count="single", type="file", label="Upload file", optional=False),],["file"],examples=[['Emotion/YOUR_FILENAME.csv'],],
77
- ).launch(debug=True)
 
 
 
 
 
23
  def __getitem__(self, idx):
24
  return {k: v[idx] for k, v in self.tokenized_texts.items()}
25
 
26
+ # load tokenizer and model, create trainer
27
+ model_name = "j-hartmann/emotion-english-distilroberta-base"
28
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
29
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
30
+ trainer = Trainer(model=model)
31
+ print(filename, type(filename))
32
+ print(filename.name)
33
+
34
+
35
+ # read csv
36
+ # even if index given, drop it
37
+ df_input = pd.read_csv(filename.name, index_col=False)
38
+ print("df_input", df_input)
39
+
40
+ # expect csv format to be in:
41
+ # 1: ID
42
+ # 2: Texts
43
+ # no index
44
+ # store ids in ordered list
45
+ ids = df_input[df_input.columns[0]].to_list()
46
+
47
+ # store sentences in ordered list
48
+ # expects sentences to be in second col
49
+ # of csv with two cols
50
+ lines_s = df_input[df_input.columns[1]].to_list()
51
 
52
  # Tokenize texts and create prediction data set
53
  tokenized_texts = tokenizer(lines_s,truncation=True,padding=True)
 
60
  preds = predictions.predictions.argmax(-1)
61
  labels = pd.Series(preds).map(model.config.id2label)
62
  scores = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)).max(1)
63
+
64
+ # round scores
65
+ scores_rounded = [round(score, 2) for score in scores]
66
+
67
  # scores raw
68
  temp = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True))
69
 
 
70
  # container
71
  anger = []
72
  disgust = []
 
87
  surprise.append(round(temp[i][6], 2))
88
 
89
  # define df
90
+ df = pd.DataFrame(list(zip(ids,lines_s,preds,labels,scores_rounded, anger, disgust, fear, joy, neutral, sadness, surprise)), columns=[df_input.columns[0], df_input.columns[1],'pred','label','score', 'anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise'])
91
+ print(df)
92
  # save results to csv
93
  YOUR_FILENAME = filename.name.split(".")[0] + "_emotion_predictions" + ".csv" # name your output file
94
  df.to_csv(YOUR_FILENAME)
 
96
  # return dataframe for space output
97
  return YOUR_FILENAME
98
 
99
+ gr.Interface(bulk_function, inputs=[gr.inputs.File(file_count="single", type="file", label="Upload file", optional=False),],
100
+ outputs=[gr.outputs.File(label="Output file")],
101
+ # examples=[["YOUR_FILENAME.csv"]], # computes, doesn't export df so far
102
+ theme="huggingface",
103
+ allow_flagging=False,
104
+ ).launch(debug=True))