Spaces:
Sleeping
Sleeping
Add models and update application file
Browse files- app.py +84 -4
- lemma_dict.pkl +3 -0
- model_1.h5 +3 -0
- model_2.h5 +3 -0
- model_3.h5 +3 -0
- model_4.h5 +3 -0
- word_dict.pkl +3 -0
app.py
CHANGED
@@ -1,7 +1,87 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import tensorflow as tf
|
5 |
+
import pickle
|
6 |
+
import spacy
|
7 |
+
from tqdm import tqdm
|
8 |
+
import gc
|
9 |
|
10 |
+
# Load models
|
11 |
+
model_1 = tf.keras.models.load_model("model_1.h5")
|
12 |
+
model_2 = tf.keras.models.load_model("model_2.h5")
|
13 |
+
model_3 = tf.keras.models.load_model("model_3.h5")
|
14 |
+
model_4 = tf.keras.models.load_model("model_4.h5")
|
15 |
|
16 |
+
# Load dictionaries
|
17 |
+
with open('word_dict.pkl', 'rb') as f:
|
18 |
+
word_dict = pickle.load(f)
|
19 |
+
|
20 |
+
with open('lemma_dict.pkl', 'rb') as f:
|
21 |
+
lemma_dict = pickle.load(f)
|
22 |
+
|
23 |
+
# Load SpaCy NLP model
|
24 |
+
nlp = spacy.load('en_core_web_lg', disable=['parser', 'ner', 'tagger'])
|
25 |
+
nlp.vocab.add_flag(lambda s: s.lower() in spacy.lang.en.stop_words.STOP_WORDS, spacy.attrs.IS_STOP)
|
26 |
+
|
27 |
+
def preprocess_text(text):
|
28 |
+
"""Preprocess the input text using SpaCy and return word indices."""
|
29 |
+
doc = nlp(text)
|
30 |
+
word_seq = []
|
31 |
+
for token in doc:
|
32 |
+
if token.pos_ != "PUNCT":
|
33 |
+
if token.text not in word_dict:
|
34 |
+
word_dict[token.text] = len(word_dict) + 1 # Increment index
|
35 |
+
word_seq.append(word_dict[token.text])
|
36 |
+
return word_seq
|
37 |
+
|
38 |
+
def classify_question(text):
|
39 |
+
# Preprocess the text
|
40 |
+
seq = preprocess_text(text)
|
41 |
+
padded_seq = tf.keras.preprocessing.sequence.pad_sequences([seq], maxlen=50) # Adjust maxlen if needed
|
42 |
+
|
43 |
+
# Get predictions from each model
|
44 |
+
pred1 = 0.15 * np.squeeze(model_1.predict(padded_seq, batch_size=1, verbose=0))
|
45 |
+
pred2 = 0.35 * np.squeeze(model_2.predict(padded_seq, batch_size=1, verbose=0))
|
46 |
+
pred3 = 0.15 * np.squeeze(model_3.predict(padded_seq, batch_size=1, verbose=0))
|
47 |
+
pred4 = 0.35 * np.squeeze(model_4.predict(padded_seq, batch_size=1, verbose=0))
|
48 |
+
|
49 |
+
# Combine predictions
|
50 |
+
avg_pred = pred1 + pred2 + pred3 + pred4
|
51 |
+
label = "Insincere" if avg_pred > 0.5 else "Sincere"
|
52 |
+
|
53 |
+
# Create a list of probabilities for each model
|
54 |
+
probs = {
|
55 |
+
"Model 1 Probability": float(pred1),
|
56 |
+
"Model 2 Probability": float(pred2),
|
57 |
+
"Model 3 Probability": float(pred3),
|
58 |
+
"Model 4 Probability": float(pred4),
|
59 |
+
"Average Probability": float(avg_pred)
|
60 |
+
}
|
61 |
+
|
62 |
+
return label, probs
|
63 |
+
|
64 |
+
# Example questions
|
65 |
+
example_questions = [
|
66 |
+
"Is this the best place to get information?",
|
67 |
+
"I need help with my homework.",
|
68 |
+
"Why do people ask such stupid questions?",
|
69 |
+
"Can you tell me the answer to life?"
|
70 |
+
]
|
71 |
+
|
72 |
+
# Gradio Interface
|
73 |
+
interface = gr.Interface(
|
74 |
+
fn=classify_question,
|
75 |
+
inputs=[
|
76 |
+
gr.inputs.Textbox(lines=2, placeholder="Enter your question here..."),
|
77 |
+
gr.inputs.Dropdown(example_questions, label="Select an example question:")
|
78 |
+
],
|
79 |
+
outputs=[
|
80 |
+
"text", # Output for label
|
81 |
+
"json" # Output for probabilities
|
82 |
+
],
|
83 |
+
title="Quora Insincere Questions Classifier",
|
84 |
+
description="Enter your question to classify it as sincere or insincere. Select an example question from the dropdown."
|
85 |
+
)
|
86 |
+
|
87 |
+
interface.launch()
|
lemma_dict.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2574d160d8550bf671f6588a1d32e28fab2f0610b526f4f09106e0ae99e9849c
|
3 |
+
size 6362111
|
model_1.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e36dfda896de06192843447fdf71b4bc5a72f46a4fc788dfb080a767af6b974c
|
3 |
+
size 749650112
|
model_2.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1d0c6ce351d7ba21b6ae5392768abf2ca44bfe22261d5d0a54109dedb6ed6c3
|
3 |
+
size 749650112
|
model_3.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a96ee5741ecf16149d3ca66e82634a5c46b42e42d939321bd1468f856c00d90
|
3 |
+
size 749650016
|
model_4.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b579d0565f632ae2a1fb53e02e0d0f452d85db7a7238bcff445644cde92b9c4
|
3 |
+
size 749650016
|
word_dict.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1abb8d9104762746b16fa989592b247332fb563b1c8be89edc2829c4d2aec513
|
3 |
+
size 4555634
|