Spaces:
Sleeping
Sleeping
File size: 3,122 Bytes
e07e824 6687c9a 870d31e e07e824 6687c9a e07e824 6687c9a 417c147 6687c9a 417c147 6687c9a ba7dd0b 6687c9a ba7dd0b 6687c9a ba7dd0b 6687c9a f227fd1 6687c9a f227fd1 6687c9a 06dcc43 6687c9a f227fd1 6687c9a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import gradio as gr
import numpy as np
import pandas as pd
import tensorflow as tf
import pickle
import spacy
from tqdm import tqdm
import gc
import os
# Download the SpaCy model
os.system("python -m spacy download en_core_web_lg")
# Load models
model_1 = tf.keras.models.load_model("model_1.h5")
model_2 = tf.keras.models.load_model("model_2.h5")
model_3 = tf.keras.models.load_model("model_3.h5")
model_4 = tf.keras.models.load_model("model_4.h5")
# Load dictionaries
with open('word_dict.pkl', 'rb') as f:
word_dict = pickle.load(f)
with open('lemma_dict.pkl', 'rb') as f:
lemma_dict = pickle.load(f)
# Load SpaCy NLP model
nlp = spacy.load('en_core_web_lg', disable=['parser', 'ner', 'tagger'])
nlp.vocab.add_flag(lambda s: s.lower() in spacy.lang.en.stop_words.STOP_WORDS, spacy.attrs.IS_STOP)
def preprocess_text(text):
"""Preprocess the input text using SpaCy and return word indices."""
docs = nlp.pipe([text], n_process=1)
word_seq = []
for doc in docs:
for token in doc:
if token.pos_ != "PUNCT":
if token.text not in word_dict:
word_dict[token.text] = len(word_dict) + 1 # Increment index.
word_seq.append(word_dict[token.text])
return word_seq
def classify_question(text):
# Preprocess the text
seq = preprocess_text(text)
padded_seq = tf.keras.preprocessing.sequence.pad_sequences([seq], maxlen=55) # Adjust maxlen if needed
BATCH_SIZE = 512
# Get predictions from each model
pred1 = 0.15 * np.squeeze(model_1.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
pred2 = 0.35 * np.squeeze(model_2.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
pred3 = 0.15 * np.squeeze(model_3.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
pred4 = 0.35 * np.squeeze(model_4.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
# Combine predictions
avg_pred = pred1 + pred2 + pred3 + pred4
label = "Insincere" if avg_pred > 0.35 else "Sincere"
# Create a list of probabilities for each model
probs = {
"Probability": float(avg_pred),
"Model Probabilities": {"Model 1": float(pred1), "Model 2": float(pred2), "Model 3": float(pred3), "Model 4": float(pred4), "visible": False},
"Sequence": {"value": seq, "visible": False}
}
return label, probs
# Example questions
examples = [
"How do you train a pigeon to send messages?",
"Is USA a shithole country owing to a shithole president?",
"Why is Indian educationa total bullshit?",
"Which person has given the least f**ks and still turned out successful?"
]
# Gradio Interface
interface = gr.Interface(
fn=classify_question,
inputs=[
gr.Textbox(lines=2, placeholder="Enter your question here..."),
],
outputs=[
"text", # Output for label
"json" # Output for probabilities
],
title="Quora Insincere Questions Classifier",
examples=examples,
description="Enter your question to classify it as sincere or insincere. Select an example question from the dropdown."
)
interface.launch() |