Spaces:
Runtime error
Runtime error
aliasgarov
commited on
Commit
·
d53b62d
1
Parent(s):
3d16af9
Uptdated
Browse files
app.py
CHANGED
|
@@ -17,6 +17,7 @@ import fitz
|
|
| 17 |
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
|
| 18 |
import nltk, spacy, subprocess, torch
|
| 19 |
import plotly.graph_objects as go
|
|
|
|
| 20 |
import nltk
|
| 21 |
from unidecode import unidecode
|
| 22 |
|
|
@@ -104,7 +105,6 @@ def plagiarism_check(
|
|
| 104 |
# print("New Score Array:\n")
|
| 105 |
# print2D(ScoreArray)
|
| 106 |
|
| 107 |
-
|
| 108 |
# Gradio formatting section
|
| 109 |
sentencePlag = [False] * len(sentences)
|
| 110 |
sentenceToMaxURL = [-1] * len(sentences)
|
|
@@ -192,9 +192,11 @@ text_mc_model_path = "polygraf-ai/ai-text-detection-mc-robert-open-ai-detector-v
|
|
| 192 |
text_mc_tokenizer = AutoTokenizer.from_pretrained(text_mc_model_path)
|
| 193 |
text_mc_model = AutoModelForSequenceClassification.from_pretrained(text_mc_model_path).to(device)
|
| 194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
def remove_accents(input_str):
|
| 196 |
-
# nfkd_form = unicodedata.normalize('NFKD', input_str)
|
| 197 |
-
# return "".join([char for char in nfkd_form if not unicodedata.combining(char)])
|
| 198 |
text_no_accents = unidecode(input_str)
|
| 199 |
return text_no_accents
|
| 200 |
|
|
@@ -266,12 +268,17 @@ def split_text_allow_complete_sentences_nltk(text, max_length=256, tolerance=30,
|
|
| 266 |
decoded_segments.append(decoded_segment)
|
| 267 |
return decoded_segments
|
| 268 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
|
| 270 |
def predict_bc(model, tokenizer, text):
|
| 271 |
tokens = text_bc_tokenizer(
|
| 272 |
text, padding='max_length', truncation=True, max_length=256, return_tensors="pt"
|
| 273 |
).to(device)["input_ids"]
|
| 274 |
-
|
| 275 |
output = model(tokens)
|
| 276 |
output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
|
| 277 |
print("BC Score: ", output_norm)
|
|
@@ -360,12 +367,14 @@ def main(
|
|
| 360 |
)
|
| 361 |
depth_analysis_plot = depth_analysis(input)
|
| 362 |
bc_score, mc_score = ai_generated_test(ai_option,input)
|
|
|
|
| 363 |
|
| 364 |
return (
|
| 365 |
bc_score,
|
| 366 |
mc_score,
|
| 367 |
formatted_tokens,
|
| 368 |
depth_analysis_plot,
|
|
|
|
| 369 |
)
|
| 370 |
|
| 371 |
|
|
@@ -520,8 +529,11 @@ with gr.Blocks() as demo:
|
|
| 520 |
only_ai_btn = gr.Button("AI Check")
|
| 521 |
|
| 522 |
with gr.Column():
|
| 523 |
-
only_plagiarism_btn = gr.Button("Source
|
| 524 |
-
|
|
|
|
|
|
|
|
|
|
| 525 |
with gr.Row():
|
| 526 |
depth_analysis_btn = gr.Button("Detailed Writing Analysis")
|
| 527 |
|
|
@@ -546,7 +558,8 @@ with gr.Blocks() as demo:
|
|
| 546 |
bcLabel = gr.Label(label="Source")
|
| 547 |
with gr.Column():
|
| 548 |
mcLabel = gr.Label(label="Creator")
|
| 549 |
-
|
|
|
|
| 550 |
with gr.Group():
|
| 551 |
with gr.Row():
|
| 552 |
month_from = gr.Dropdown(
|
|
@@ -615,6 +628,7 @@ with gr.Blocks() as demo:
|
|
| 615 |
mcLabel,
|
| 616 |
sentenceBreakdown,
|
| 617 |
writing_analysis_plot,
|
|
|
|
| 618 |
],
|
| 619 |
api_name="main",
|
| 620 |
)
|
|
@@ -629,6 +643,13 @@ with gr.Blocks() as demo:
|
|
| 629 |
api_name="ai_check",
|
| 630 |
)
|
| 631 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 632 |
only_plagiarism_btn.click(
|
| 633 |
fn=plagiarism_check,
|
| 634 |
inputs=[
|
|
|
|
| 17 |
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
|
| 18 |
import nltk, spacy, subprocess, torch
|
| 19 |
import plotly.graph_objects as go
|
| 20 |
+
import torch.nn.functional as F
|
| 21 |
import nltk
|
| 22 |
from unidecode import unidecode
|
| 23 |
|
|
|
|
| 105 |
# print("New Score Array:\n")
|
| 106 |
# print2D(ScoreArray)
|
| 107 |
|
|
|
|
| 108 |
# Gradio formatting section
|
| 109 |
sentencePlag = [False] * len(sentences)
|
| 110 |
sentenceToMaxURL = [-1] * len(sentences)
|
|
|
|
| 192 |
text_mc_tokenizer = AutoTokenizer.from_pretrained(text_mc_model_path)
|
| 193 |
text_mc_model = AutoModelForSequenceClassification.from_pretrained(text_mc_model_path).to(device)
|
| 194 |
|
| 195 |
+
quillbot_labels = ["Original", "QuillBot"]
|
| 196 |
+
quillbot_tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base")
|
| 197 |
+
quillbot_model = AutoModelForSequenceClassification.from_pretrained("polygraf-ai/quillbot-detector-roberta-base-28K").to(device)
|
| 198 |
+
|
| 199 |
def remove_accents(input_str):
|
|
|
|
|
|
|
| 200 |
text_no_accents = unidecode(input_str)
|
| 201 |
return text_no_accents
|
| 202 |
|
|
|
|
| 268 |
decoded_segments.append(decoded_segment)
|
| 269 |
return decoded_segments
|
| 270 |
|
| 271 |
+
def predict_quillbot(text):
|
| 272 |
+
tokenized_text = quillbot_tokenizer(text, padding="max_length", truncation=True, max_length=256, return_tensors="pt").to(device)["input_ids"]
|
| 273 |
+
output = quillbot_model(tokenized_text)
|
| 274 |
+
output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
|
| 275 |
+
q_score = {"QuillBot": output_norm[1].item(), "Original": output_norm[0].item()}
|
| 276 |
+
return q_score
|
| 277 |
|
| 278 |
def predict_bc(model, tokenizer, text):
|
| 279 |
tokens = text_bc_tokenizer(
|
| 280 |
text, padding='max_length', truncation=True, max_length=256, return_tensors="pt"
|
| 281 |
).to(device)["input_ids"]
|
|
|
|
| 282 |
output = model(tokens)
|
| 283 |
output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
|
| 284 |
print("BC Score: ", output_norm)
|
|
|
|
| 367 |
)
|
| 368 |
depth_analysis_plot = depth_analysis(input)
|
| 369 |
bc_score, mc_score = ai_generated_test(ai_option,input)
|
| 370 |
+
quilscore = predict_quillbot(input)
|
| 371 |
|
| 372 |
return (
|
| 373 |
bc_score,
|
| 374 |
mc_score,
|
| 375 |
formatted_tokens,
|
| 376 |
depth_analysis_plot,
|
| 377 |
+
quilscore
|
| 378 |
)
|
| 379 |
|
| 380 |
|
|
|
|
| 529 |
only_ai_btn = gr.Button("AI Check")
|
| 530 |
|
| 531 |
with gr.Column():
|
| 532 |
+
only_plagiarism_btn = gr.Button("Source Check")
|
| 533 |
+
|
| 534 |
+
with gr.Row():
|
| 535 |
+
quillbot_check = gr.Button("Humanized Text Check (Quillbot)")
|
| 536 |
+
|
| 537 |
with gr.Row():
|
| 538 |
depth_analysis_btn = gr.Button("Detailed Writing Analysis")
|
| 539 |
|
|
|
|
| 558 |
bcLabel = gr.Label(label="Source")
|
| 559 |
with gr.Column():
|
| 560 |
mcLabel = gr.Label(label="Creator")
|
| 561 |
+
with gr.Row():
|
| 562 |
+
QLabel = gr.Label(label="Humanized")
|
| 563 |
with gr.Group():
|
| 564 |
with gr.Row():
|
| 565 |
month_from = gr.Dropdown(
|
|
|
|
| 628 |
mcLabel,
|
| 629 |
sentenceBreakdown,
|
| 630 |
writing_analysis_plot,
|
| 631 |
+
QLabel
|
| 632 |
],
|
| 633 |
api_name="main",
|
| 634 |
)
|
|
|
|
| 643 |
api_name="ai_check",
|
| 644 |
)
|
| 645 |
|
| 646 |
+
quillbot_check.click(
|
| 647 |
+
fn=predict_quillbot,
|
| 648 |
+
inputs=[input_text],
|
| 649 |
+
outputs=[QLabel],
|
| 650 |
+
api_name="quillbot_check",
|
| 651 |
+
)
|
| 652 |
+
|
| 653 |
only_plagiarism_btn.click(
|
| 654 |
fn=plagiarism_check,
|
| 655 |
inputs=[
|