"""
    @author : Sakshi Tantak
"""

# Imports
import re
import string
import pickle
from time import time

import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords

import emoji

from paths import COUNT_VECTORIZER_PATH, TFIDF_VECTORIZER_PATH, NB_MODEL_PATH as MODEL_PATH

nltk.download('punkt')
nltk.download('omw-1.4')
nltk.download('stopwords')
nltk.download('wordnet')

stops = stopwords.words('english')
negatives = ['no','nor','not','ain','aren',"aren't",'couldn',"couldn't",'didn',"didn't",'doesn',"doesn't",'hadn',"hadn't",'hasn',
  "hasn't",'haven',"haven't",'isn',"isn't",'mightn',"mightn't",'mustn',"mustn't",'needn',"needn't",'shan',"shan't",'shouldn',"shouldn't",
  'wasn',"wasn't",'weren',"weren't","won't",'wouldn',"wouldn't",'don',"don't"]
stops = set([stop for stop in stops if stop not in negatives])

lemmatizer = WordNetLemmatizer()
MODEL, COUNT_VECTORIZER, TFIDF = None, None, None

def clean_text(text):
    text = re.sub(r'[\.]+', '.', text)
    # print(text)
    text = re.sub(r'[\!]+', '!', text)
    # print(text)
    text = re.sub(r'[\?]+', '!', text)
    # print(text)
    text = re.sub(r'\s+', ' ', text).strip().lower()
    # print(text)
    text = re.sub(r'@\w+', '', text).strip().lower()
    # print(text)
    text = re.sub(r'\s[n]+[o]+', ' no', text)
    # print(text)
    text = re.sub(r'n\'t', 'n not', text)
    # print(text)
    text = re.sub(r'\'nt', 'n not', text)
    # print(text)
    text = re.sub(r'\'re', ' are', text)
    # print(text)
    text = re.sub(r'\'s', ' is', text)
    # print(text)
    text = re.sub(r'\'d', ' would', text)
    # print(text)
    text = re.sub(r'\'ll', ' will', text)
    # print(text)
    text = re.sub(r'\'ve', ' have', text)
    # print(text)
    text = re.sub(r'\'m', ' am', text)
    # print(text)
    # map variations of nope to no
    text = re.sub(r'\s[n]+[o]+[p]+[e]+', ' no', text)
    # print(text)
    # clean websites mentioned in text
    text = re.sub(r'(https|http)?:\/\/(\w|\.|\/|\?|\=|\&|\%|\~)*\b', '', text, flags=re.MULTILINE).strip()
    # print(text)
    text = re.sub(r'(www.)(\w|\.|\/|\?|\=|\&|\%)*\b', '', text, flags=re.MULTILINE).strip()
    # print(text)
    text = re.sub(r'\w+.com', '', text).strip()
    # print(text)
    text = emoji.demojize(text)
    return text

def remove_punctuation(text):
    translator = str.maketrans(string.punctuation, ' '*len(string.punctuation))
    text = text.translate(translator)
    return re.sub(r'\s+', ' ', text).strip()

def remove_numbers(text):
  return re.sub(r'[0-9]+', '', text)

def remove_stopwords_and_lemmatize(text):
    tokens = word_tokenize(text)
    tokens = [token.strip() for token in tokens if token.strip() not in stops]
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return ' '.join(tokens)

def load_model():
    global MODEL, COUNT_VECTORIZER, TFIDF

    if MODEL is None:
        with open(MODEL_PATH, 'rb') as f:
            print('Loading classifier ...')
            start = time()
            MODEL = pickle.load(f)
            print(f'Time taken to load model = {time() - start}')
        f.close()

    if COUNT_VECTORIZER is None:
        with open(COUNT_VECTORIZER_PATH, 'rb') as f:
            print('Loading count vectorizer ...')
            start = time()
            COUNT_VECTORIZER = pickle.load(f)
            print(f'Time taken to load count vectorizer = {time() - start}')
        f.close()

    if TFIDF is None:
        with open(TFIDF_VECTORIZER_PATH, 'rb') as f:
            print('Loading tfidf vectorizer ...')
            start = time()
            TFIDF = pickle.load(f)
            print(f'Time taken to load tfidf vectorizer = {time() - start}')
        f.close()

def predict(text):
    if MODEL is None:
        load_model()

    text = clean_text(text)
    text = remove_numbers(text)
    text = remove_punctuation(text)
    text = remove_stopwords_and_lemmatize(text)

    vector = COUNT_VECTORIZER.transform([text]).toarray()
    vector = TFIDF.transform(vector).toarray()
    start = time()
    prediction = MODEL.predict(vector)
    print(prediction)
    prediction = MODEL.predict(vector).item()
    print(f'Inference time = {time() - start}')
    return ('positive', 1) if prediction == 1 else ('negative', 1)

if __name__ == '__main__':
    text = input('Enter tweet : ')
    # text = "i am so bored!!!"
    prediction = predict(text)
    print(text, ' : ', prediction)