Spaces:

NithitEiEi
/

insincere-question

Sleeping

App Files Files Community

NithitEiEi commited on Nov 4, 2024

Commit

d8f4336

verified ·

1 Parent(s): 19fca01

upload model and app

Browse files

Files changed (11) hide show

app.py +81 -0
bert/bert_model.pth +3 -0
bert/model.py +49 -0
deberta/fastai_QIQC-deberta-v3.pth +3 -0
deberta/model.py +53 -0
lstm/model.py +45 -0
lstm/model_1.h5 +3 -0
lstm/model_2.h5 +3 -0
lstm/model_3.h5 +3 -0
lstm/model_4.h5 +3 -0
lstm/word_dict.pkl +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import gradio as gr
+from bert.model import BERT_predict
+from lstm.model import BiLSTM_predict
+from deberta.model import deBERTa_predict
+examples=[
+            "How do I train my dogs to kill raccoons?",
+            "Why Indian education totally bullshit?",
+            "What is it really like to be a nurse practitioner",
+            "What is your prefer race? white, black or asian."
+        ]
+def alert(prob):
+    raise gr.Error(f"This may not suitable for posting insincere probability {prob:.2f}")
+def clear_post():
+    return ""
+def add_post(posts, new_post, mode):
+    if mode == "BiLSTM":
+        prob = BiLSTM_predict(new_post)
+        insincere = 1 if prob > 0.35 else 0
+        if insincere:
+            alert(prob)
+            return posts, ""
+        else:
+            return posts + [{"post": new_post, "model": "BiLSTM", "prob": prob}], ""
+    elif mode == "BERT":
+        insincere, prob = BERT_predict(new_post)
+        if insincere:
+            alert(prob)
+            return posts, ""
+        else:
+            return posts + [{"post": new_post, "model": "BERT", "prob": prob}], ""
+    elif mode == "DeBERTaV3":
+        insincere, prob = deBERTa_predict(new_post)
+        if insincere:
+            alert(prob)
+            return posts, ""
+        else:
+            return posts + [{"post": new_post, "model": "DeBERTaV3", "prob": prob}], ""
+with gr.Blocks(theme=gr.themes.Soft(), title="Quara Question post") as demo:
+    posts = gr.State([])
+    new_post = gr.Textbox(label="Add post", autofocus=True)
+    mode = gr.Radio(["BiLSTM", "BERT", "DeBERTaV3"], value="BiLSTM", label="Model")
+    with gr.Row():
+        submit = gr.Button("submit", variant='primary')
+        clear = gr.Button("clear")
+    submit.click(add_post, inputs=[posts, new_post, mode], outputs=[posts, new_post])
+    clear.click(clear_post, inputs=None, outputs=new_post)
+    @gr.render(inputs=posts)
+    def render_posts(post_list):
+        output = [post for post in post_list]
+        gr.Markdown(f"### Question post ({len(output)})")
+        for index, post in enumerate(output):
+            with gr.Row():
+                gr.Textbox(
+                    f"{post['post']} | {post['prob']:.8f}",
+                    label=f"Post{index + 1} ({post['model']})",
+                    show_label=True
+                )
+                delete_btn = gr.Button("Delete", scale=0, variant="stop")
+                def delete(post=post):
+                    post_list.remove(post)
+                    return post_list
+                delete_btn.click(delete, None, [posts])
+    with gr.Row():
+        examples = gr.Examples(
+        examples=examples,
+        inputs=[new_post],
+    )
+demo.launch()

bert/bert_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d7423dcada747073f22d892f938150cfe737b5c4a46aabd334f20959d604db1
+size 436484297

bert/model.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+class BertClassifier(nn.Module):
+    def __init__(self, bert):
+        super(BertClassifier, self).__init__()
+        self.bert = bert
+    def forward(self, input_id, attention_mask):
+        output = self.bert(input_ids=input_id, attention_mask=attention_mask)
+        return output.logits
+tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
+bert = AutoModelForSequenceClassification.from_pretrained('bert-base-cased').train()
+classifier = nn.Sequential(
+    nn.Linear(768, 1024),
+    nn.ReLU(),
+    nn.Dropout(0.5),
+    nn.Linear(1024, 2)
+)
+bert.classifier = classifier
+model = BertClassifier(bert)
+model.load_state_dict(torch.load("./bert/bert_model.pth", map_location=torch.device('cpu'), weights_only=True))
+model.eval()
+def BERT_predict(text):
+    tokenized_input = tokenizer(text,
+                                padding="max_length",
+                                truncation=True,
+                                max_length=30,
+                                return_tensors="pt")
+    model.eval()
+    with torch.no_grad():
+        logits = model(tokenized_input['input_ids'], tokenized_input['attention_mask'])
+    probabilities = F.softmax(logits, dim=-1)
+    prediction = torch.argmax(probabilities, dim=-1).item()
+    return prediction, probabilities[0][1].item()

deberta/fastai_QIQC-deberta-v3.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d2b64c078a7de8ffe57b1ff767e3bfac6bdb52bb3f5977b5e06f3ce9993b873
+size 740942321

deberta/model.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+class BertClassifier(nn.Module):
+    def __init__(self, bert):
+        super(BertClassifier, self).__init__()
+        self.bert = bert
+    def forward(self, input_id, attention_mask):
+        output = self.bert(input_ids=input_id, attention_mask=attention_mask)
+        return output.logits
+tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-base')
+bert = AutoModelForSequenceClassification.from_pretrained('microsoft/deberta-v3-base').train()
+classifier = nn.Sequential(
+    nn.Linear(768, 1024),
+    nn.ReLU(),
+    nn.Dropout(0.5),
+    nn.Linear(1024, 2)
+)
+bert.classifier = classifier
+model = BertClassifier(bert)
+state_dict = torch.load(
+    "./deberta/fastai_QIQC-deberta-v3.pth", map_location=torch.device('cpu'),
+    weights_only=True
+    )
+model.load_state_dict(state_dict, strict=False)
+model.eval()
+def deBERTa_predict(text):
+    tokenized_input = tokenizer(text,
+                                padding="max_length",
+                                truncation=True,
+                                max_length=30,
+                                return_tensors="pt")
+    model.eval()
+    with torch.no_grad():
+        logits = model(tokenized_input['input_ids'], tokenized_input['attention_mask'])
+    probabilities = F.softmax(logits, dim=-1)
+    prediction = torch.argmax(probabilities, dim=-1).item()
+    return prediction, probabilities[0][1].item()

lstm/model.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import os
+import spacy
+import pickle
+import numpy as np
+import tensorflow as tf
+BATCH_SIZE = 512
+os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
+def preprocess_text(text):
+    """Preprocess the input text using SpaCy and return word indices."""
+    docs = nlp.pipe([text], n_process=1)
+    word_seq = []
+    for doc in docs:
+        for token in doc:
+            if token.pos_ != "PUNCT":
+                if token.text not in word_dict:
+                    word_dict[token.text] = 0 # OOV_INDEX
+                word_seq.append(word_dict[token.text])
+    return word_seq
+def BiLSTM_predict(text):
+    seq = preprocess_text(text)
+    padded_seq = tf.keras.preprocessing.sequence.pad_sequences([seq], maxlen=55)
+    pred1 = 0.15 * np.squeeze(model_1.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
+    pred2 = 0.35 * np.squeeze(model_2.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
+    pred3 = 0.15 * np.squeeze(model_3.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
+    pred4 = 0.35 * np.squeeze(model_4.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2))
+    pred = pred1 + pred2 + pred3 + pred4
+    return pred
+model_1 = tf.keras.models.load_model("./lstm/model_1.h5")
+model_2 = tf.keras.models.load_model("./lstm/model_2.h5")
+model_3 = tf.keras.models.load_model("./lstm/model_3.h5")
+model_4 = tf.keras.models.load_model("./lstm/model_4.h5")
+with open('./lstm/word_dict.pkl', 'rb') as f:
+    word_dict = pickle.load(f)
+os.system("python -m spacy download en_core_web_lg")
+nlp = spacy.load('en_core_web_lg', disable=['parser', 'ner', 'tagger'])
+nlp.vocab.add_flag(lambda s: s.lower() in spacy.lang.en.stop_words.STOP_WORDS, spacy.attrs.IS_STOP)

lstm/model_1.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e36dfda896de06192843447fdf71b4bc5a72f46a4fc788dfb080a767af6b974c
+size 749650112

lstm/model_2.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a1d0c6ce351d7ba21b6ae5392768abf2ca44bfe22261d5d0a54109dedb6ed6c3
+size 749650112

lstm/model_3.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0a96ee5741ecf16149d3ca66e82634a5c46b42e42d939321bd1468f856c00d90
+size 749650016

lstm/model_4.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0b579d0565f632ae2a1fb53e02e0d0f452d85db7a7238bcff445644cde92b9c4
+size 749650016

lstm/word_dict.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1abb8d9104762746b16fa989592b247332fb563b1c8be89edc2829c4d2aec513
+size 4555634