minko186's picture
added option to choose size of sentence block for source detect
9c75413
raw
history blame
10.6 kB
import gradio as gr
import numpy as np
from datetime import date
from predictors import predict_bc_scores, predict_mc_scores, predict_1on1_scores
from analysis import depth_analysis
from predictors import predict_quillbot
from plagiarism import plagiarism_check, build_date, html_highlight
from highlighter import analyze_and_highlight
from utils import extract_text_from_pdf, len_validator
import yaml
from functools import partial
np.set_printoptions(suppress=True)
with open("config.yaml", "r") as file:
params = yaml.safe_load(file)
model_list = params["MC_OUTPUT_LABELS"]
analyze_and_highlight_bc = partial(analyze_and_highlight, model_type="bc")
analyze_and_highlight_quillbot = partial(
analyze_and_highlight, model_type="quillbot"
)
def ai_generated_test(option, input, models):
if option == "Human vs AI":
return predict_bc_scores(input), None
elif option == "Human vs AI Source Models":
return predict_bc_scores(input), predict_1on1_scores(input, models)
return None, None
# COMBINED
def main(
ai_option,
plag_option,
input,
models,
year_from,
month_from,
day_from,
year_to,
month_to,
day_to,
domains_to_skip,
source_block_size,
):
# formatted_tokens = plagiarism_check(
# plag_option,
# input,
# year_from,
# month_from,
# day_from,
# year_to,
# month_to,
# day_to,
# domains_to_skip,
# )
formatted_tokens = html_highlight(
plag_option,
input,
year_from,
month_from,
day_from,
year_to,
month_to,
day_to,
domains_to_skip,
source_block_size,
)
depth_analysis_plot = depth_analysis(input)
bc_score = predict_bc_scores(input)
mc_score = predict_1on1_scores(input, models)
quilscore = predict_quillbot(input)
return (
bc_score,
mc_score,
formatted_tokens,
depth_analysis_plot,
quilscore,
)
# START OF GRADIO
title = "Copyright Checker"
months = {
"January": "01",
"February": "02",
"March": "03",
"April": "04",
"May": "05",
"June": "06",
"July": "07",
"August": "08",
"September": "09",
"October": "10",
"November": "11",
"December": "12",
}
with gr.Blocks() as demo:
today = date.today()
# dd/mm/YY
d1 = today.strftime("%d/%B/%Y")
d1 = d1.split("/")
domain_list = ["com", "org", "net", "int", "edu", "gov", "mil"]
gr.Markdown(
"""
# Copyright Checker
"""
)
with gr.Row():
input_text = gr.Textbox(label="Input text", lines=6, placeholder="")
file_input = gr.File(label="Upload PDF")
file_input.change(
fn=extract_text_from_pdf, inputs=file_input, outputs=input_text
)
char_count = gr.Textbox(label="Minumum Character Limit Check")
input_text.change(fn=len_validator, inputs=input_text, outputs=char_count)
with gr.Row():
models = gr.Dropdown(
model_list,
value=model_list,
multiselect=True,
label="Models to test against",
)
with gr.Row():
with gr.Column():
ai_option = gr.Radio(
[
"Human vs AI",
"Human vs AI Source Models",
# "Human vs AI Source Models (1 on 1)",
],
label="Choose an option please.",
)
with gr.Column():
plag_option = gr.Radio(
["Standard", "Advanced"], label="Choose an option please."
)
with gr.Row():
source_block_size = gr.Dropdown(
choices=["1", "2", "3", "Paragraph"],
label="Source Check Granularity",
value="2",
interactive=True,
)
with gr.Row():
with gr.Column():
only_ai_btn = gr.Button("AI Check")
with gr.Column():
only_plagiarism_btn = gr.Button("Source Check")
with gr.Column():
quillbot_check = gr.Button("Humanized Text Check")
with gr.Row():
with gr.Column():
bc_highlighter_button = gr.Button("Human vs. AI Highlighter")
with gr.Column():
quillbot_highlighter_button = gr.Button("Humanized Highlighter")
with gr.Row():
depth_analysis_btn = gr.Button("Detailed Writing Analysis")
with gr.Row():
full_check_btn = gr.Button("Full Check")
gr.Markdown(
"""
## Output
"""
)
with gr.Row():
with gr.Column():
bcLabel = gr.Label(label="Source")
with gr.Column():
mcLabel = gr.Label(label="Creator")
with gr.Row():
with gr.Column():
bc_highlighter_output = gr.HTML(label="Human vs. AI Highlighter")
# with gr.Column():
# mc1on1Label = gr.Label(label="Creator(1 on 1 Approach)")
with gr.Row():
with gr.Column():
QLabel = gr.Label(label="Humanized")
with gr.Row():
quillbot_highlighter_output = gr.HTML(label="Humanized Highlighter")
with gr.Group():
with gr.Row():
month_from = gr.Dropdown(
choices=months,
label="From Month",
value="January",
interactive=True,
)
day_from = gr.Textbox(label="From Day", value="01")
year_from = gr.Textbox(label="From Year", value="2000")
# from_date_button = gr.Button("Submit")
with gr.Row():
month_to = gr.Dropdown(
choices=months,
label="To Month",
value=d1[1],
interactive=True,
)
day_to = gr.Textbox(label="To Day", value=d1[0])
year_to = gr.Textbox(label="To Year", value=d1[2])
# to_date_button = gr.Button("Submit")
with gr.Row():
domains_to_skip = gr.Dropdown(
domain_list,
multiselect=True,
label="Domain To Skip",
)
with gr.Row():
with gr.Column():
sentenceBreakdown = gr.HTML(
label="Source Detection Sentence Breakdown",
value="Source Detection Sentence Breakdown",
)
with gr.Row():
with gr.Column():
writing_analysis_plot = gr.Plot(label="Writing Analysis Plot")
with gr.Column():
interpretation = """
<h2>Writing Analysis Interpretation</h2>
<ul>
<li><b>Lexical Diversity</b>: This feature measures the range of unique words used in a text.
<ul>
<li>🤖 Higher tends to be AI.</li>
</ul>
</li>
<li><b>Vocabulary Level</b>: This feature assesses the complexity of the words used in a text.
<ul>
<li>🤖 Higher tends to be AI.</li>
</ul>
</li>
<li><b>Unique Words</b>: This feature counts the number of words that appear only once within the text.
<ul>
<li>🤖 Higher tends to be AI.</li>
</ul>
</li>
<li><b>Determiner Use</b>: This feature tracks the frequency of articles and quantifiers in the text.
<ul>
<li>🤖 Higher tends to be AI.</li>
</ul>
</li>
<li><b>Punctuation Variety</b>: This feature indicates the diversity of punctuation marks used in the text.
<ul>
<li>👤 Higher tends to be Human.</li>
</ul>
</li>
<li><b>Sentence Depth</b>: This feature evaluates the complexity of the sentence structures used in the text.
<ul>
<li>🤖 Higher tends to be AI.</li>
</ul>
</li>
<li><b>Vocabulary Stability</b>: This feature measures the consistency of vocabulary use throughout the text.
<ul>
<li>🤖 Higher tends to be AI.</li>
</ul>
</li>
<li><b>Entity Ratio</b>: This feature calculates the proportion of named entities, such as names and places, within the text.
<ul>
<li>👤 Higher tends to be Human.</li>
</ul>
</li>
<li><b>Perplexity</b>: This feature assesses the predictability of the text based on the sequence of words.
<ul>
<li>👤 Higher tends to be Human.</li>
</ul>
</li>
</ul>
"""
gr.HTML(interpretation, label="Interpretation of Writing Analysis")
full_check_btn.click(
fn=main,
inputs=[
ai_option,
plag_option,
input_text,
models,
year_from,
month_from,
day_from,
year_to,
month_to,
day_to,
domains_to_skip,
source_block_size,
],
outputs=[
bcLabel,
mcLabel,
# mc1on1Label,
sentenceBreakdown,
writing_analysis_plot,
QLabel,
],
api_name="main",
)
only_ai_btn.click(
fn=ai_generated_test,
inputs=[ai_option, input_text, models],
# outputs=[bcLabel, mcLabel, mc1on1Label],
outputs=[bcLabel, mcLabel],
api_name="ai_check",
)
quillbot_check.click(
fn=predict_quillbot,
inputs=[input_text],
outputs=[QLabel],
api_name="quillbot_check",
)
only_plagiarism_btn.click(
# fn=plagiarism_check,
fn=html_highlight,
inputs=[
plag_option,
input_text,
year_from,
month_from,
day_from,
year_to,
month_to,
day_to,
domains_to_skip,
source_block_size,
],
outputs=[
sentenceBreakdown,
],
api_name="plagiarism_check",
)
depth_analysis_btn.click(
fn=depth_analysis,
inputs=[input_text],
outputs=[writing_analysis_plot],
api_name="depth_analysis",
)
quillbot_highlighter_button.click(
fn=analyze_and_highlight_quillbot,
inputs=[input_text],
outputs=[quillbot_highlighter_output],
api_name="humanized_highlighter",
)
bc_highlighter_button.click(
fn=analyze_and_highlight_bc,
inputs=[input_text],
outputs=[bc_highlighter_output],
api_name="bc_highlighter",
)
date_from = ""
date_to = ""
if __name__ == "__main__":
demo.launch(
share=True, server_name="0.0.0.0", auth=("polygraf-admin", "test@aisd")
)