File size: 4,782 Bytes
c24562e
 
0c396ba
 
 
 
 
c24562e
0c396ba
c24562e
0c396ba
 
 
c24562e
0c396ba
 
 
 
 
c24562e
 
 
 
 
0c396ba
 
 
c24562e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c396ba
 
 
 
 
 
c24562e
 
 
0c396ba
 
 
 
c24562e
 
 
 
0c396ba
 
 
 
c24562e
 
 
 
0c396ba
c24562e
 
0c396ba
c24562e
 
 
 
0c396ba
 
 
 
 
 
 
 
 
 
c24562e
 
0c396ba
c24562e
0c396ba
c24562e
0c396ba
c24562e
0c396ba
c24562e
0c396ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c24562e
 
 
0c396ba
 
c24562e
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import gradio as gr
import numpy as np
from keras.models import load_model
import re
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import tokenizer_from_json
import re
import joblib
import json

# Load the JSON data from the file
with open('tok.json', 'r') as json_file:
    tokenizer_json = json.load(json_file)

# Assuming tokenizer_class is the class of your tokenizer
tokenizer = tokenizer_from_json(tokenizer_json)
rnn_model = load_model("rnn_model.h5")

# setting the joblib
lr_model = joblib.load("logistic_model.pkl")
svm_model = joblib.load("svm_model.pkl")
nn_model = load_model("dl_model.h5")
mnb_model = joblib.load("mnb_model.pkl")

# Load other necessary files like vectorizers or scalers
tfidf_vectorizer = joblib.load("tfidf_vectorizer.pkl")

TAG_RE = re.compile(r'<[^>]+>')

def remove_tags(text):
    return TAG_RE.sub('', text)

def preprocess_text(sen):
    # Removing html tags
    sentence = remove_tags(sen)

    # Remove punctuations and numbers
    sentence = re.sub('[^a-zA-Z]', ' ', sentence)

    # Single character removal
    sentence = re.sub(r"\s+[a-zA-Z]\s+", ' ', sentence)

    # Removing multiple spaces
    sentence = re.sub(r'\s+', ' ', sentence)

    return sentence

def preprocess_text_for_rnn(text, tokenizer, maxlen):
    text = preprocess_text(text)
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, padding='post', maxlen=maxlen)
    return padded_sequence

def predict_lr(text):
    preprocessed_text = preprocess_text(text)
    vectorized_text = tfidf_vectorizer.transform([preprocessed_text])
    dense_vectorized_text = vectorized_text.toarray()
    prediction_probs = lr_model.predict_proba(dense_vectorized_text)[0]
    prediction = int(np.argmax(prediction_probs))
    return prediction, prediction_probs

def predict_svm(text):
    preprocessed_text = preprocess_text(text)
    vectorized_text = tfidf_vectorizer.transform([preprocessed_text])
    dense_vectorized_text = vectorized_text.toarray()
    prediction_probs = svm_model.predict_proba(dense_vectorized_text)[0]
    prediction = int(np.argmax(prediction_probs))
    return prediction, prediction_probs

def predict_nn(text):
    preprocessed_text = preprocess_text(text)
    vectorized_text = tfidf_vectorizer.transform([preprocessed_text])
    dense_vectorized_text = vectorized_text.toarray()
    prediction_probs = nn_model.predict(dense_vectorized_text)[0]
    prediction = int(np.argmax(prediction_probs))
    return prediction, prediction_probs

def predict_mnb(text):
    preprocessed_text = preprocess_text(text)
    vectorized_text = tfidf_vectorizer.transform([preprocessed_text])
    dense_vectorized_text = vectorized_text.toarray()
    prediction_probs = mnb_model.predict_proba(dense_vectorized_text)[0]
    prediction = int(np.argmax(prediction_probs))
    return prediction, prediction_probs

def predict_rnn(text):
    processed_text = preprocess_text_for_rnn(text, tokenizer, maxlen=170)
    prediction_probs = rnn_model.predict(processed_text)[0]
    prediction = int(np.argmax(prediction_probs))
    return prediction, prediction_probs

def sentiment_prediction(text, model):
    prediction, percentages = 0, []
    if model == "Logistic Regression":
        prediction, percentages = predict_lr(text)
    elif model == "SVM":
        prediction, percentages = predict_svm(text)
    elif model == "Neural Network":
        prediction, percentages = predict_nn(text)
    elif model == "Multinomial Naive Bayes":
        prediction, percentages = predict_mnb(text)
    elif model == "Recurrent Neural Network":
        prediction, percentages = predict_rnn(text)

    # Displaying emojis based on sentiment
    emoji_positive = "πŸ˜ƒ"
    emoji_negative = "😒"
    emoji = emoji_positive if prediction == 1 else emoji_negative

    # Create labels for predictions
    labels = ["Negative", "Positive"]

    # Create label for the prediction
    prediction_label = labels[prediction]

    # Calculate percentages for both labels
    percentage_negative = percentages[0]
    percentage_positive = percentages[1]

    return prediction_label, f"{labels[0]}: {percentage_negative:.2%}, Percentage {labels[1]}: {percentage_positive:.2%}", emoji

# Create the Gradio interface
iface = gr.Interface(
    fn=sentiment_prediction,
    inputs=[gr.Textbox(type="text", label="Enter Text"), gr.Dropdown(["Logistic Regression", "SVM", "Neural Network", "Multinomial Naive Bayes", "Recurrent Neural Network"], label="Select Model")],
    outputs=[gr.Label(), gr.Label(), gr.Label()],
    live=True,
    title="Sentiment Analysis with Model Selection",
    description="Enter a text and choose a model for sentiment prediction.",
)

# Launch the Gradio interface
iface.launch()