File size: 3,060 Bytes
e07e824
6687c9a
 
 
 
 
 
 
870d31e
 
 
 
e07e824
6687c9a
 
 
 
 
e07e824
6687c9a
 
 
 
 
 
 
 
 
 
 
 
 
ba7dd0b
6687c9a
 
 
 
 
 
 
 
 
 
 
ba7dd0b
 
6687c9a
ba7dd0b
 
 
 
6687c9a
 
 
ba7dd0b
6687c9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
06dcc43
5b6e62c
6687c9a
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import gradio as gr
import numpy as np
import pandas as pd
import tensorflow as tf
import pickle
import spacy
from tqdm import tqdm
import gc
import os

# Download the SpaCy model
os.system("python -m spacy download en_core_web_lg")

# Load models
model_1 = tf.keras.models.load_model("model_1.h5")
model_2 = tf.keras.models.load_model("model_2.h5")
model_3 = tf.keras.models.load_model("model_3.h5")
model_4 = tf.keras.models.load_model("model_4.h5")

# Load dictionaries
with open('word_dict.pkl', 'rb') as f:
    word_dict = pickle.load(f)

with open('lemma_dict.pkl', 'rb') as f:
    lemma_dict = pickle.load(f)

# Load SpaCy NLP model
nlp = spacy.load('en_core_web_lg', disable=['parser', 'ner', 'tagger'])
nlp.vocab.add_flag(lambda s: s.lower() in spacy.lang.en.stop_words.STOP_WORDS, spacy.attrs.IS_STOP)

def preprocess_text(text):
    """Preprocess the input text using SpaCy and return word indices."""
    doc = nlp.pipe(text, n_process=1)
    word_seq = []
    for token in doc:
        if token.pos_ != "PUNCT":
            if token.text not in word_dict:
                word_dict[token.text] = len(word_dict) + 1  # Increment index
            word_seq.append(word_dict[token.text])
    return word_seq

def classify_question(text):
    # Preprocess the text
    seq = preprocess_text(text)
    padded_seq = tf.keras.preprocessing.sequence.pad_sequences([seq], maxlen=55)  # Adjust maxlen if needed
    BATCH_SIZE = 512
    # Get predictions from each model
    pred1 = 0.15 * np.squeeze(model_1.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
    pred2 = 0.35 * np.squeeze(model_2.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
    pred3 = 0.15 * np.squeeze(model_3.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
    pred4 = 0.35 * np.squeeze(model_4.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))

    # Combine predictions
    avg_pred = pred1 + pred2 + pred3 + pred4
    label = "Insincere" if avg_pred > 0.35 else "Sincere"

    # Create a list of probabilities for each model
    probs = {
        "Model 1 Probability": float(pred1),
        "Model 2 Probability": float(pred2),
        "Model 3 Probability": float(pred3),
        "Model 4 Probability": float(pred4),
        "Average Probability": float(avg_pred)
    }

    return label, probs

# Example questions
example_questions = [
    "Is this the best place to get information?",
    "I need help with my homework.",
    "Why do people ask such stupid questions?",
    "Can you tell me the answer to life?"
]

# Gradio Interface
interface = gr.Interface(
    fn=classify_question,
    inputs=[
        gr.Textbox(lines=2, placeholder="Enter your question here..."),
        # gr.Dropdown(example_questions, label="Select an example question:")
    ],
    outputs=[
        "text",  # Output for label
        "json"   # Output for probabilities
    ],
    title="Quora Insincere Questions Classifier",
    description="Enter your question to classify it as sincere or insincere. Select an example question from the dropdown."
)

interface.launch()