Spaces:

rasmodev
/

Covid19_Tweet_Sentiment_Analysis_App

Running

App Files Files Community

rasmodev commited on Oct 6, 2023

Commit

2ce2001

1 Parent(s): 2a7bdd2

Updated Gradio App

Browse files

Files changed (1) hide show

app.py +36 -7

app.py CHANGED Viewed

@@ -7,27 +7,55 @@ Original file is located at
     https://colab.research.google.com/drive/1SKjRNc67_9TZPKUGhtfiYMfcpZuMh6s0
 """
-# Commented out IPython magic to ensure Python compatibility.
 # %pip install gradio transformers -q
 # Import the key libraries
 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from scipy.special import softmax
 # Load the tokenizer and model from Hugging Face
-model_path = "rasmodev/Covid-19_Sentiment_Analysis_BERT_Model"
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 model = AutoModelForSequenceClassification.from_pretrained(model_path)
-# Preprocess text (username and link placeholders)
 def preprocess(text):
     new_text = []
-    for t in text.split(" "):
         t = '@user' if t.startswith('@') and len(t) > 1 else t
         t = 'http' if t.startswith('http') else t
         new_text.append(t)
     return " ".join(new_text)
 # Perform sentiment analysis
@@ -72,9 +100,10 @@ interface = gr.Interface(
         ["This vaccine is terrible!"],
         ["I don't have a strong opinion about this vaccines."],
         ["The Vaccine is Good I have had no issues!"]
-    ]
 )
 # Launch the Gradio app
-if __name__ == '__main__':
-    interface.launch(server_name="0.0.0.0", server_port=7860)

     https://colab.research.google.com/drive/1SKjRNc67_9TZPKUGhtfiYMfcpZuMh6s0
 """
 # %pip install gradio transformers -q
+# %pip install nltk
 # Import the key libraries
 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from scipy.special import softmax
+import nltk
+import re
+from nltk.corpus import stopwords
+from nltk.stem import WordNetLemmatizer
+# Download NLTK resources (if not already downloaded)
+nltk.download('stopwords')
+nltk.download('wordnet')
 # Load the tokenizer and model from Hugging Face
+model_path = "rasmodev/Covid-19_Sentiment_Analysis_RoBERTa_Model"
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 model = AutoModelForSequenceClassification.from_pretrained(model_path)
+# Preprocess text (username and link placeholders, and text preprocessing)
 def preprocess(text):
+    # Convert text to lowercase
+    text = text.lower()
+    # Remove special characters, numbers, and extra whitespaces
+    text = re.sub(r'[^a-zA-Z\s]', '', text)
+    # Remove stopwords (common words that don't carry much meaning)
+    stop_words = set(stopwords.words('english'))
+    words = text.split()  # Split text into words
+    words = [word for word in words if word not in stop_words]
+    # Lemmatize words to their base form
+    lemmatizer = WordNetLemmatizer()
+    words = [lemmatizer.lemmatize(word) for word in words]
+    # Rejoin the preprocessed words into a single string
+    processed_text = ' '.join(words)
+    # Process placeholders
     new_text = []
+    for t in processed_text.split(" "):
         t = '@user' if t.startswith('@') and len(t) > 1 else t
         t = 'http' if t.startswith('http') else t
         new_text.append(t)
     return " ".join(new_text)
 # Perform sentiment analysis
         ["This vaccine is terrible!"],
         ["I don't have a strong opinion about this vaccines."],
         ["The Vaccine is Good I have had no issues!"]
+    ],
+    custom_css="""body { background-color: #f5f5f5; }"""
 )
 # Launch the Gradio app
+interface.launch()