Updated Gradio App
Browse files
app.py
CHANGED
@@ -7,27 +7,55 @@ Original file is located at
|
|
7 |
https://colab.research.google.com/drive/1SKjRNc67_9TZPKUGhtfiYMfcpZuMh6s0
|
8 |
"""
|
9 |
|
10 |
-
# Commented out IPython magic to ensure Python compatibility.
|
11 |
# %pip install gradio transformers -q
|
|
|
12 |
|
13 |
# Import the key libraries
|
14 |
import gradio as gr
|
15 |
import torch
|
16 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
17 |
from scipy.special import softmax
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
# Load the tokenizer and model from Hugging Face
|
20 |
-
model_path = "rasmodev/Covid-
|
21 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
22 |
model = AutoModelForSequenceClassification.from_pretrained(model_path)
|
23 |
|
24 |
-
# Preprocess text (username and link placeholders)
|
25 |
def preprocess(text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
new_text = []
|
27 |
-
for t in
|
28 |
t = '@user' if t.startswith('@') and len(t) > 1 else t
|
29 |
t = 'http' if t.startswith('http') else t
|
30 |
new_text.append(t)
|
|
|
31 |
return " ".join(new_text)
|
32 |
|
33 |
# Perform sentiment analysis
|
@@ -72,9 +100,10 @@ interface = gr.Interface(
|
|
72 |
["This vaccine is terrible!"],
|
73 |
["I don't have a strong opinion about this vaccines."],
|
74 |
["The Vaccine is Good I have had no issues!"]
|
75 |
-
]
|
|
|
76 |
)
|
77 |
|
78 |
# Launch the Gradio app
|
79 |
-
|
80 |
-
|
|
|
7 |
https://colab.research.google.com/drive/1SKjRNc67_9TZPKUGhtfiYMfcpZuMh6s0
|
8 |
"""
|
9 |
|
|
|
10 |
# %pip install gradio transformers -q
|
11 |
+
# %pip install nltk
|
12 |
|
13 |
# Import the key libraries
|
14 |
import gradio as gr
|
15 |
import torch
|
16 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
17 |
from scipy.special import softmax
|
18 |
+
import nltk
|
19 |
+
import re
|
20 |
+
from nltk.corpus import stopwords
|
21 |
+
from nltk.stem import WordNetLemmatizer
|
22 |
+
|
23 |
+
# Download NLTK resources (if not already downloaded)
|
24 |
+
nltk.download('stopwords')
|
25 |
+
nltk.download('wordnet')
|
26 |
|
27 |
# Load the tokenizer and model from Hugging Face
|
28 |
+
model_path = "rasmodev/Covid-19_Sentiment_Analysis_RoBERTa_Model"
|
29 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
30 |
model = AutoModelForSequenceClassification.from_pretrained(model_path)
|
31 |
|
32 |
+
# Preprocess text (username and link placeholders, and text preprocessing)
|
33 |
def preprocess(text):
|
34 |
+
# Convert text to lowercase
|
35 |
+
text = text.lower()
|
36 |
+
|
37 |
+
# Remove special characters, numbers, and extra whitespaces
|
38 |
+
text = re.sub(r'[^a-zA-Z\s]', '', text)
|
39 |
+
|
40 |
+
# Remove stopwords (common words that don't carry much meaning)
|
41 |
+
stop_words = set(stopwords.words('english'))
|
42 |
+
words = text.split() # Split text into words
|
43 |
+
words = [word for word in words if word not in stop_words]
|
44 |
+
|
45 |
+
# Lemmatize words to their base form
|
46 |
+
lemmatizer = WordNetLemmatizer()
|
47 |
+
words = [lemmatizer.lemmatize(word) for word in words]
|
48 |
+
|
49 |
+
# Rejoin the preprocessed words into a single string
|
50 |
+
processed_text = ' '.join(words)
|
51 |
+
|
52 |
+
# Process placeholders
|
53 |
new_text = []
|
54 |
+
for t in processed_text.split(" "):
|
55 |
t = '@user' if t.startswith('@') and len(t) > 1 else t
|
56 |
t = 'http' if t.startswith('http') else t
|
57 |
new_text.append(t)
|
58 |
+
|
59 |
return " ".join(new_text)
|
60 |
|
61 |
# Perform sentiment analysis
|
|
|
100 |
["This vaccine is terrible!"],
|
101 |
["I don't have a strong opinion about this vaccines."],
|
102 |
["The Vaccine is Good I have had no issues!"]
|
103 |
+
],
|
104 |
+
custom_css="""body { background-color: #f5f5f5; }"""
|
105 |
)
|
106 |
|
107 |
# Launch the Gradio app
|
108 |
+
interface.launch()
|
109 |
+
|