rasmodev commited on
Commit
2ce2001
·
1 Parent(s): 2a7bdd2

Updated Gradio App

Browse files
Files changed (1) hide show
  1. app.py +36 -7
app.py CHANGED
@@ -7,27 +7,55 @@ Original file is located at
7
  https://colab.research.google.com/drive/1SKjRNc67_9TZPKUGhtfiYMfcpZuMh6s0
8
  """
9
 
10
- # Commented out IPython magic to ensure Python compatibility.
11
  # %pip install gradio transformers -q
 
12
 
13
  # Import the key libraries
14
  import gradio as gr
15
  import torch
16
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
17
  from scipy.special import softmax
 
 
 
 
 
 
 
 
18
 
19
  # Load the tokenizer and model from Hugging Face
20
- model_path = "rasmodev/Covid-19_Sentiment_Analysis_BERT_Model"
21
  tokenizer = AutoTokenizer.from_pretrained(model_path)
22
  model = AutoModelForSequenceClassification.from_pretrained(model_path)
23
 
24
- # Preprocess text (username and link placeholders)
25
  def preprocess(text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  new_text = []
27
- for t in text.split(" "):
28
  t = '@user' if t.startswith('@') and len(t) > 1 else t
29
  t = 'http' if t.startswith('http') else t
30
  new_text.append(t)
 
31
  return " ".join(new_text)
32
 
33
  # Perform sentiment analysis
@@ -72,9 +100,10 @@ interface = gr.Interface(
72
  ["This vaccine is terrible!"],
73
  ["I don't have a strong opinion about this vaccines."],
74
  ["The Vaccine is Good I have had no issues!"]
75
- ]
 
76
  )
77
 
78
  # Launch the Gradio app
79
- if __name__ == '__main__':
80
- interface.launch(server_name="0.0.0.0", server_port=7860)
 
7
  https://colab.research.google.com/drive/1SKjRNc67_9TZPKUGhtfiYMfcpZuMh6s0
8
  """
9
 
 
10
  # %pip install gradio transformers -q
11
+ # %pip install nltk
12
 
13
  # Import the key libraries
14
  import gradio as gr
15
  import torch
16
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
17
  from scipy.special import softmax
18
+ import nltk
19
+ import re
20
+ from nltk.corpus import stopwords
21
+ from nltk.stem import WordNetLemmatizer
22
+
23
+ # Download NLTK resources (if not already downloaded)
24
+ nltk.download('stopwords')
25
+ nltk.download('wordnet')
26
 
27
  # Load the tokenizer and model from Hugging Face
28
+ model_path = "rasmodev/Covid-19_Sentiment_Analysis_RoBERTa_Model"
29
  tokenizer = AutoTokenizer.from_pretrained(model_path)
30
  model = AutoModelForSequenceClassification.from_pretrained(model_path)
31
 
32
+ # Preprocess text (username and link placeholders, and text preprocessing)
33
  def preprocess(text):
34
+ # Convert text to lowercase
35
+ text = text.lower()
36
+
37
+ # Remove special characters, numbers, and extra whitespaces
38
+ text = re.sub(r'[^a-zA-Z\s]', '', text)
39
+
40
+ # Remove stopwords (common words that don't carry much meaning)
41
+ stop_words = set(stopwords.words('english'))
42
+ words = text.split() # Split text into words
43
+ words = [word for word in words if word not in stop_words]
44
+
45
+ # Lemmatize words to their base form
46
+ lemmatizer = WordNetLemmatizer()
47
+ words = [lemmatizer.lemmatize(word) for word in words]
48
+
49
+ # Rejoin the preprocessed words into a single string
50
+ processed_text = ' '.join(words)
51
+
52
+ # Process placeholders
53
  new_text = []
54
+ for t in processed_text.split(" "):
55
  t = '@user' if t.startswith('@') and len(t) > 1 else t
56
  t = 'http' if t.startswith('http') else t
57
  new_text.append(t)
58
+
59
  return " ".join(new_text)
60
 
61
  # Perform sentiment analysis
 
100
  ["This vaccine is terrible!"],
101
  ["I don't have a strong opinion about this vaccines."],
102
  ["The Vaccine is Good I have had no issues!"]
103
+ ],
104
+ custom_css="""body { background-color: #f5f5f5; }"""
105
  )
106
 
107
  # Launch the Gradio app
108
+ interface.launch()
109
+