Spaces:

non2013
/

SincereQuestions

Sleeping

App Files Files Community

SincereQuestions / app.py

non2013

update interface

5b6e62c 9 months ago

raw

history blame

3.06 kB

	import gradio as gr
	import numpy as np
	import pandas as pd
	import tensorflow as tf
	import pickle
	import spacy
	from tqdm import tqdm
	import gc
	import os

	# Download the SpaCy model
	os.system("python -m spacy download en_core_web_lg")

	# Load models
	model_1 = tf.keras.models.load_model("model_1.h5")
	model_2 = tf.keras.models.load_model("model_2.h5")
	model_3 = tf.keras.models.load_model("model_3.h5")
	model_4 = tf.keras.models.load_model("model_4.h5")

	# Load dictionaries
	with open('word_dict.pkl', 'rb') as f:
	word_dict = pickle.load(f)

	with open('lemma_dict.pkl', 'rb') as f:
	lemma_dict = pickle.load(f)

	# Load SpaCy NLP model
	nlp = spacy.load('en_core_web_lg', disable=['parser', 'ner', 'tagger'])
	nlp.vocab.add_flag(lambda s: s.lower() in spacy.lang.en.stop_words.STOP_WORDS, spacy.attrs.IS_STOP)

	def preprocess_text(text):
	"""Preprocess the input text using SpaCy and return word indices."""
	doc = nlp.pipe(text, n_process=1)
	word_seq = []
	for token in doc:
	if token.pos_ != "PUNCT":
	if token.text not in word_dict:
	word_dict[token.text] = len(word_dict) + 1 # Increment index
	word_seq.append(word_dict[token.text])
	return word_seq

	def classify_question(text):
	# Preprocess the text
	seq = preprocess_text(text)
	padded_seq = tf.keras.preprocessing.sequence.pad_sequences([seq], maxlen=55) # Adjust maxlen if needed
	BATCH_SIZE = 512
	# Get predictions from each model
	pred1 = 0.15 * np.squeeze(model_1.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
	pred2 = 0.35 * np.squeeze(model_2.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
	pred3 = 0.15 * np.squeeze(model_3.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
	pred4 = 0.35 * np.squeeze(model_4.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))

	# Combine predictions
	avg_pred = pred1 + pred2 + pred3 + pred4
	label = "Insincere" if avg_pred > 0.35 else "Sincere"

	# Create a list of probabilities for each model
	probs = {
	"Model 1 Probability": float(pred1),
	"Model 2 Probability": float(pred2),
	"Model 3 Probability": float(pred3),
	"Model 4 Probability": float(pred4),
	"Average Probability": float(avg_pred)
	}

	return label, probs

	# Example questions
	example_questions = [
	"Is this the best place to get information?",
	"I need help with my homework.",
	"Why do people ask such stupid questions?",
	"Can you tell me the answer to life?"
	]

	# Gradio Interface
	interface = gr.Interface(
	fn=classify_question,
	inputs=[
	gr.Textbox(lines=2, placeholder="Enter your question here..."),
	# gr.Dropdown(example_questions, label="Select an example question:")
	],
	outputs=[
	"text", # Output for label
	"json" # Output for probabilities
	],
	title="Quora Insincere Questions Classifier",
	description="Enter your question to classify it as sincere or insincere. Select an example question from the dropdown."
	)

	interface.launch()