Spaces:
Runtime error
Runtime error
File size: 2,374 Bytes
6cf89af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
"""
@author : Sakshi Tantak
"""
# Imports
import re
from time import time
import emoji
import spacy
import spacy_transformers
from paths import SPACY_MODEL_PATH as MODEL_PATH
def clean_text(text):
text = re.sub(r'[\.]+', '.', text)
# print(text)
text = re.sub(r'[\!]+', '!', text)
# print(text)
text = re.sub(r'[\?]+', '!', text)
# print(text)
text = re.sub(r'\s+', ' ', text).strip().lower()
# print(text)
text = re.sub(r'@\w+', '', text).strip().lower()
# print(text)
text = re.sub(r'\s[n]+[o]+', ' no', text)
# print(text)
text = re.sub(r'n\'t', 'n not', text)
# print(text)
text = re.sub(r'\'nt', 'n not', text)
# print(text)
text = re.sub(r'\'re', ' are', text)
# print(text)
text = re.sub(r'\'s', ' is', text)
# print(text)
text = re.sub(r'\'d', ' would', text)
# print(text)
text = re.sub(r'\'ll', ' will', text)
# print(text)
text = re.sub(r'\'ve', ' have', text)
# print(text)
text = re.sub(r'\'m', ' am', text)
# print(text)
# map variations of nope to no
text = re.sub(r'\s[n]+[o]+[p]+[e]+', ' no', text)
# print(text)
# clean websites mentioned in text
text = re.sub(r'(https|http)?:\/\/(\w|\.|\/|\?|\=|\&|\%|\~)*\b', '', text, flags=re.MULTILINE).strip()
# print(text)
text = re.sub(r'(www.)(\w|\.|\/|\?|\=|\&|\%)*\b', '', text, flags=re.MULTILINE).strip()
# print(text)
text = re.sub(r'\w+.com', '', text).strip()
# print(text)
text = emoji.demojize(text)
return text
class SentimentClassifier:
def __init__(self):
print('Loading SpaCy sentiment classifier ...')
start = time()
self.nlp = spacy.load(MODEL_PATH)
print(f'Time taken to load SpaCy classifier = {time() - start}')
def predict(self, text):
text = clean_text(text)
print(f'cleaned text : {text}')
start = time()
cats = self.nlp(text)
print(cats.cats)
print(f'Inference time = {time() - start}')
return ('positive', cats.cats['positive']) if cats.cats['positive'] > cats.cats['negative'] else ('negative', cats.cats['negative'])
if __name__ == '__main__':
text = input('Input tweet : ')
text = clean_text(text)
classifier = SentimentClassifier()
prediction = classifier.predict(text)
print(text, ' : ', prediction) |