non2013's picture
update interface
5b6e62c
raw
history blame
3.06 kB
import gradio as gr
import numpy as np
import pandas as pd
import tensorflow as tf
import pickle
import spacy
from tqdm import tqdm
import gc
import os
# Download the SpaCy model
os.system("python -m spacy download en_core_web_lg")
# Load models
model_1 = tf.keras.models.load_model("model_1.h5")
model_2 = tf.keras.models.load_model("model_2.h5")
model_3 = tf.keras.models.load_model("model_3.h5")
model_4 = tf.keras.models.load_model("model_4.h5")
# Load dictionaries
with open('word_dict.pkl', 'rb') as f:
word_dict = pickle.load(f)
with open('lemma_dict.pkl', 'rb') as f:
lemma_dict = pickle.load(f)
# Load SpaCy NLP model
nlp = spacy.load('en_core_web_lg', disable=['parser', 'ner', 'tagger'])
nlp.vocab.add_flag(lambda s: s.lower() in spacy.lang.en.stop_words.STOP_WORDS, spacy.attrs.IS_STOP)
def preprocess_text(text):
"""Preprocess the input text using SpaCy and return word indices."""
doc = nlp.pipe(text, n_process=1)
word_seq = []
for token in doc:
if token.pos_ != "PUNCT":
if token.text not in word_dict:
word_dict[token.text] = len(word_dict) + 1 # Increment index
word_seq.append(word_dict[token.text])
return word_seq
def classify_question(text):
# Preprocess the text
seq = preprocess_text(text)
padded_seq = tf.keras.preprocessing.sequence.pad_sequences([seq], maxlen=55) # Adjust maxlen if needed
BATCH_SIZE = 512
# Get predictions from each model
pred1 = 0.15 * np.squeeze(model_1.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
pred2 = 0.35 * np.squeeze(model_2.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
pred3 = 0.15 * np.squeeze(model_3.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
pred4 = 0.35 * np.squeeze(model_4.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
# Combine predictions
avg_pred = pred1 + pred2 + pred3 + pred4
label = "Insincere" if avg_pred > 0.35 else "Sincere"
# Create a list of probabilities for each model
probs = {
"Model 1 Probability": float(pred1),
"Model 2 Probability": float(pred2),
"Model 3 Probability": float(pred3),
"Model 4 Probability": float(pred4),
"Average Probability": float(avg_pred)
}
return label, probs
# Example questions
example_questions = [
"Is this the best place to get information?",
"I need help with my homework.",
"Why do people ask such stupid questions?",
"Can you tell me the answer to life?"
]
# Gradio Interface
interface = gr.Interface(
fn=classify_question,
inputs=[
gr.Textbox(lines=2, placeholder="Enter your question here..."),
# gr.Dropdown(example_questions, label="Select an example question:")
],
outputs=[
"text", # Output for label
"json" # Output for probabilities
],
title="Quora Insincere Questions Classifier",
description="Enter your question to classify it as sincere or insincere. Select an example question from the dropdown."
)
interface.launch()