Winnie-Kay commited on
Commit
18ef847
·
1 Parent(s): 8c483b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +180 -67
app.py CHANGED
@@ -1,75 +1,188 @@
1
- import os
2
- os.system('pip install torch')
3
- os.system('pip install transformers')
4
- os.system('pip install scipy')
5
- os.system('pip install gradio')
6
 
 
7
 
8
- from scipy.special import softmax
9
- import gradio as gr
10
- from transformers import (
11
- AutoTokenizer,
12
- AutoConfig,
13
- AutoModelForSequenceClassification,
14
- TFAutoModelForSequenceClassification)
15
- # Define the model path where the pre-trained model is saved on the Hugging Face model hub
16
- model_path = "Winnie-Kay/Finetuned_bert_model"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- # Initialize the tokenizer for the pre-trained model
19
- tokenizer = AutoTokenizer.from_pretrained(model_path)
20
 
21
- # Load the configuration for the pre-trained model
22
- config = AutoConfig.from_pretrained(model_path)
23
 
24
- # Load the pre-trained model
25
- model = AutoModelForSequenceClassification.from_pretrained(model_path)
26
 
27
  # Define a function to preprocess the text data
 
28
  def preprocess(text):
29
- new_text = []
30
- # Replace user mentions with '@user'
31
- for t in text.split(" "):
32
- t = '@user' if t.startswith('@') and len(t) > 1 else t
33
- # Replace links with 'http'
34
- t = 'http' if t.startswith('http') else t
35
- new_text.append(t)
36
- # Join the preprocessed text
37
- return " ".join(new_text)
38
-
39
- # Define a function to perform sentiment analysis on the input text
40
- def sentiment_analysis(text):
41
- # Preprocess the input text
42
- text = preprocess(text)
43
-
44
- # Tokenize the input text using the pre-trained tokenizer
45
- encoded_input = tokenizer(text, return_tensors='pt')
46
-
47
- # Feed the tokenized input to the pre-trained model and obtain output
48
- output = model(**encoded_input)
49
-
50
- # Obtain the prediction scores for the output
51
- scores_ = output[0][0].detach().numpy()
52
-
53
- # Apply softmax activation function to obtain probability distribution over the labels
54
- scores_ = softmax(scores_)
55
-
56
- # Format the output dictionary with the predicted scores
57
- labels = ['Negative', 'Neutral', 'Positive']
58
- scores = {l:float(s) for (l,s) in zip(labels, scores_) }
59
-
60
- # Return the scores
61
- return scores
62
-
63
- # Define a Gradio interface to interact with the model
64
- demo = gr.Interface(
65
- fn=sentiment_analysis, # Function to perform sentiment analysis
66
- inputs=gr.Textbox(placeholder="Write your tweet here..."), # Text input field
67
- outputs="label", # Output type (here, we only display the label with the highest score)
68
- interpretation="default", # Interpretation mode
69
- examples=[["Have Fun with it...will be updated soon!"]],# Example input(s) to display on the interface
70
- image=gr.Image("https://www.reputationx.com/hubfs/what-is-sentiment-analysis-cover.jpg"),
71
- css= "body {background-color: black}"
72
- )
73
-
74
- # Launch the Gradio interface
75
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
 
 
 
 
2
 
3
+ import transformers
4
 
5
+ import torch
6
+
7
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
8
+
9
+
10
+
11
+
12
+ # Define the paths of the pre-trained models
13
+
14
+ model1_path = "saisi/finetuned-Sentiment-classfication-ROBERTA-Base-model"
15
+
16
+ model2_path = "saisi/finetuned-Sentiment-classfication-DISTILBERT-model"
17
+
18
+
19
+
20
+
21
+ # Initialize the tokenizer and models for sentiment analysis
22
+
23
+ tokenizer1 = AutoTokenizer.from_pretrained(model1_path)
24
+
25
+ model1 = AutoModelForSequenceClassification.from_pretrained(model1_path)
26
+
27
+ tokenizer2 = AutoTokenizer.from_pretrained(model2_path)
28
+
29
+ model2 = AutoModelForSequenceClassification.from_pretrained(model2_path)
30
 
 
 
31
 
 
 
32
 
 
 
33
 
34
  # Define a function to preprocess the text data
35
+
36
  def preprocess(text):
37
+
38
+     new_text = []
39
+
40
+     # Replace user mentions with '@user'
41
+
42
+     for t in text.split(" "):
43
+
44
+         t = '@user' if t.startswith('@') and len(t) > 1 else t
45
+
46
+         # Replace links with 'http'
47
+
48
+         t = 'http' if t.startswith('http') else t
49
+
50
+         new_text.append(t)
51
+
52
+     # Join the preprocessed text
53
+
54
+     return " ".join(new_text)
55
+
56
+
57
+
58
+
59
+ # Define a function to perform sentiment analysis on the input text using model 1
60
+
61
+ def sentiment_analysis_model1(text):
62
+
63
+     # Preprocess the input text
64
+
65
+     text = preprocess(text)
66
+
67
+
68
+
69
+
70
+     # Tokenize the input text using the pre-trained tokenizer
71
+
72
+     encoded_input = tokenizer1(text, return_tensors='pt')
73
+
74
+    
75
+
76
+     # Feed the tokenized input to the pre-trained model and obtain output
77
+
78
+     output = model1(**encoded_input)
79
+
80
+    
81
+
82
+     # Obtain the prediction scores for the output
83
+
84
+     scores_ = output[0][0].detach().numpy()
85
+
86
+    
87
+
88
+     # Apply softmax activation function to obtain probability distribution over the labels
89
+
90
+     scores_ = torch.nn.functional.softmax(torch.from_numpy(scores_), dim=0).numpy()
91
+
92
+    
93
+
94
+     # Format the output dictionary with the predicted scores
95
+
96
+     labels = ['Negative', 'Positive']
97
+
98
+     scores = {l:float(s) for (l,s) in zip(labels, scores_) }
99
+
100
+    
101
+
102
+     # Return the scores
103
+
104
+     return scores
105
+
106
+
107
+
108
+
109
+ # Define a function to perform sentiment analysis on the input text using model 2
110
+
111
+ def sentiment_analysis_model2(text):
112
+
113
+     # Preprocess the input text
114
+
115
+     text = preprocess(text)
116
+
117
+
118
+
119
+
120
+     # Tokenize the input text using the pre-trained tokenizer
121
+
122
+     encoded_input = tokenizer2(text, return_tensors='pt')
123
+
124
+    
125
+
126
+     # Feed the tokenized input to the pre-trained model and obtain output
127
+
128
+     output = model2(**encoded_input)
129
+
130
+    
131
+
132
+     # Obtain the prediction scores for the output
133
+
134
+     scores_ = output[0][0].detach().numpy()
135
+
136
+    
137
+
138
+     # Apply softmax activation function to obtain probability distribution over the labels
139
+
140
+     scores_ = torch.nn.functional.softmax(torch.from_numpy(scores_), dim=0).numpy()
141
+
142
+    
143
+
144
+     # Format the output dictionary with the predicted scores
145
+
146
+     labels = ['Negative', 'Neutral', 'Positive']
147
+
148
+     scores = {l:float(s) for (l,s) in zip(labels, scores_) }
149
+
150
+    
151
+
152
+     # Return the scores
153
+
154
+     return scores
155
+
156
+
157
+
158
+
159
+ # Define the Streamlit app
160
+
161
+ def app():
162
+
163
+     # Define the app title
164
+
165
+     st.title("Sentiment Analysis")
166
+
167
+
168
+
169
+
170
+     # Define the input field
171
+
172
+     text_input = st.text_input("Enter text:")
173
+
174
+
175
+
176
+
177
+     # Define the model selection dropdown
178
+
179
+     model_selection = st.selectbox("Select a model:", ["Model 1", "Model 2"])
180
+
181
+
182
+
183
+
184
+     # Perform sentiment analysis when the submit button is clicked
185
+
186
+     if st.button("Submit"):
187
+
188
+         if text_input