Spaces:
Running
Running
import gradio as gr | |
import numpy as np | |
from datetime import date | |
from predictors import predict_bc_scores, predict_mc_scores | |
from predictors import update,update_main, correct_text, split_text | |
from analysis import depth_analysis | |
from predictors import predict_quillbot | |
from plagiarism import plagiarism_check, build_date, html_highlight | |
from highlighter import segmented_higlighter | |
from utils import extract_text_from_pdf, len_validator, extract_text_from_html | |
import yaml | |
from functools import partial | |
from audio import assemblyai_transcribe | |
import yt_dlp | |
import os | |
import pandas as pd | |
from const import plag_script | |
from datasets import load_dataset, Dataset | |
np.set_printoptions(suppress=True) | |
with open("config.yaml", "r") as file: | |
params = yaml.safe_load(file) | |
model_list = params["MC_OUTPUT_LABELS"] | |
analyze_and_highlight_bc = partial(segmented_higlighter, model_type="bc") | |
analyze_and_highlight_quillbot = partial( | |
segmented_higlighter, model_type="quillbot" | |
) | |
def save_request(email, video_url): | |
# Save the email and video URL to the CSV file | |
if email is None or email == "": | |
return "Please enter your email.", gr.update(visible=True) | |
dat = load_dataset(requests_repo)["train"] | |
df = dat.to_pandas() | |
new_row = pd.DataFrame( | |
{"email": [email], "video_url": [video_url], "status": "pending"} | |
) | |
df = pd.concat([df, new_row], ignore_index=True) | |
dat = Dataset.from_pandas(df) | |
dat.push_to_hub(requests_repo) | |
return "Your request has been saved.", gr.update(visible=False) | |
def ai_generated_test(option, bias_buster_selected, input): | |
if bias_buster_selected: | |
input = update(input) | |
if option == "Human vs AI": | |
return predict_bc_scores(input), None | |
elif option == "Human vs AI Source Models": | |
return predict_bc_scores(input), predict_mc_scores(input) | |
return None, None | |
# COMBINED | |
def main( | |
ai_option, | |
plag_option, | |
input, | |
year_from, | |
month_from, | |
day_from, | |
year_to, | |
month_to, | |
day_to, | |
domains_to_skip, | |
source_block_size, | |
): | |
formatted_tokens = html_highlight( | |
plag_option, | |
input, | |
year_from, | |
month_from, | |
day_from, | |
year_to, | |
month_to, | |
day_to, | |
domains_to_skip, | |
source_block_size, | |
) | |
depth_analysis_plot = depth_analysis(input, bias_buster_selected) | |
bc_score = predict_bc_scores(input) | |
mc_score = predict_mc_scores(input) | |
quilscore = predict_quillbot(input, bias_buster_selected) | |
return ( | |
bc_score, | |
mc_score, | |
formatted_tokens, | |
depth_analysis_plot, | |
quilscore, | |
) | |
# START OF GRADIO | |
title = "AI Detection and Source Analysis" | |
months = { | |
"January": "01", | |
"February": "02", | |
"March": "03", | |
"April": "04", | |
"May": "05", | |
"June": "06", | |
"July": "07", | |
"August": "08", | |
"September": "09", | |
"October": "10", | |
"November": "11", | |
"December": "12", | |
} | |
with gr.Blocks() as demo: | |
today = date.today() | |
# dd/mm/YY | |
d1 = today.strftime("%d/%B/%Y") | |
d1 = d1.split("/") | |
domain_list = ["com", "org", "net", "int", "edu", "gov", "mil"] | |
gr.Markdown( | |
""" | |
# AI Detection and Source Analysis | |
""" | |
) | |
with gr.Row(): | |
input_text = gr.Textbox(label="Input text", lines=6, placeholder="") | |
file_input = gr.File(label="Upload PDF") | |
file_input.change( | |
fn=extract_text_from_pdf, inputs=file_input, outputs=input_text | |
) | |
with gr.Column(visible=False) as request_row: | |
with gr.Row(): | |
email_input = gr.Textbox(label="Email") | |
youtube_url_input = gr.Textbox(label="YouTube Video URL") | |
with gr.Row(): | |
video_submit_btn = gr.Button("Submit Video Request") | |
with gr.Row(): | |
url_input = gr.Textbox( | |
label="Input Page URL to check", lines=1, placeholder="") | |
url_input.change( | |
fn=extract_text_from_html, inputs=url_input, outputs=input_text) | |
audio_url_input = gr.Textbox(label="Input YouTube URL to check", lines=1, placeholder="") | |
audio_url_input.change( | |
fn=assemblyai_transcribe, inputs=audio_url_input, outputs=input_text | |
) | |
video_submit_btn.click( | |
fn=save_request, | |
inputs=[email_input, youtube_url_input], | |
outputs=[input_text, request_row], | |
api_name="video_request", | |
) | |
char_count = gr.Textbox(label="Minumum Character Limit Check") | |
input_text.change(fn=len_validator, inputs=input_text, outputs=char_count) | |
with gr.Row(): | |
btn = gr.Button("Deception Filter") | |
out = gr.Textbox(label="Corrected Full Input", interactive=False) | |
corrections_output = gr.Textbox(label="Corrections", interactive=False) | |
btn.click(fn=update_main, inputs=input_text, outputs=[out, corrections_output]) | |
with gr.Row(): | |
models = gr.Dropdown( | |
model_list, | |
value=model_list, | |
multiselect=True, | |
label="Models to test against", | |
) | |
with gr.Row(): | |
with gr.Column(): | |
ai_option = gr.Radio( | |
[ | |
"Human vs AI", | |
"Human vs AI Source Models", | |
], | |
label="Choose an option please.", | |
) | |
with gr.Column(): | |
bias_buster_selected = gr.Checkbox(label="Bias Remover") | |
with gr.Column(): | |
plag_option = gr.Radio( | |
["Standard", "Advanced"], label="Choose an option please." | |
) | |
with gr.Row(): | |
source_block_size = gr.Dropdown( | |
choices=["Sentence", "Paragraph"], | |
label="Source Check Granularity", | |
value="Sentence", | |
interactive=True, | |
) | |
with gr.Row(): | |
with gr.Column(): | |
only_ai_btn = gr.Button("AI Check") | |
with gr.Column(): | |
only_plagiarism_btn = gr.Button("Source Check") | |
with gr.Column(): | |
quillbot_check = gr.Button("Humanized Text Check") | |
with gr.Row(): | |
with gr.Column(): | |
bc_highlighter_button = gr.Button("Human vs. AI Highlighter") | |
with gr.Column(): | |
quillbot_highlighter_button = gr.Button("Humanized Highlighter") | |
with gr.Row(): | |
depth_analysis_btn = gr.Button("Detailed Writing Analysis") | |
with gr.Row(): | |
full_check_btn = gr.Button("Full Check") | |
gr.Markdown( | |
""" | |
## Output | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(): | |
bcLabel = gr.Label(label="Source") | |
with gr.Column(): | |
mcLabel = gr.Label(label="Creator") | |
with gr.Row(): | |
with gr.Column(): | |
bc_highlighter_output = gr.HTML(label="Human vs. AI Highlighter") | |
with gr.Row(): | |
with gr.Column(): | |
QLabel = gr.Label(label="Humanized") | |
with gr.Row(): | |
quillbot_highlighter_output = gr.HTML(label="Humanized Highlighter") | |
with gr.Group(): | |
with gr.Row(): | |
month_from = gr.Dropdown( | |
choices=months, | |
label="From Month", | |
value="January", | |
interactive=True, | |
) | |
day_from = gr.Textbox(label="From Day", value="01") | |
year_from = gr.Textbox(label="From Year", value="2000") | |
# from_date_button = gr.Button("Submit") | |
with gr.Row(): | |
month_to = gr.Dropdown( | |
choices=months, | |
label="To Month", | |
value=d1[1], | |
interactive=True, | |
) | |
day_to = gr.Textbox(label="To Day", value=d1[0]) | |
year_to = gr.Textbox(label="To Year", value=d1[2]) | |
# to_date_button = gr.Button("Submit") | |
with gr.Row(): | |
domains_to_skip = gr.Dropdown( | |
domain_list, | |
multiselect=True, | |
label="Domain To Skip", | |
) | |
with gr.Row(): | |
with gr.Column(): | |
sentenceBreakdown = gr.HTML( | |
label="Source Detection Sentence Breakdown", | |
value="Source Detection Sentence Breakdown", | |
) | |
with gr.Row(): | |
with gr.Column(): | |
writing_analysis_plot = gr.Plot(label="Writing Analysis Plot") | |
with gr.Column(): | |
interpretation = """ | |
<h2>Writing Analysis Interpretation</h2> | |
<ul> | |
<li><b>Lexical Diversity</b>: This feature measures the range of unique words used in a text. | |
<ul> | |
<li>🤖 Higher tends to be AI.</li> | |
</ul> | |
</li> | |
<li><b>Vocabulary Level</b>: This feature assesses the complexity of the words used in a text. | |
<ul> | |
<li>🤖 Higher tends to be AI.</li> | |
</ul> | |
</li> | |
<li><b>Unique Words</b>: This feature counts the number of words that appear only once within the text. | |
<ul> | |
<li>🤖 Higher tends to be AI.</li> | |
</ul> | |
</li> | |
<li><b>Determiner Use</b>: This feature tracks the frequency of articles and quantifiers in the text. | |
<ul> | |
<li>🤖 Higher tends to be AI.</li> | |
</ul> | |
</li> | |
<li><b>Punctuation Variety</b>: This feature indicates the diversity of punctuation marks used in the text. | |
<ul> | |
<li>👤 Higher tends to be Human.</li> | |
</ul> | |
</li> | |
<li><b>Sentence Depth</b>: This feature evaluates the complexity of the sentence structures used in the text. | |
<ul> | |
<li>🤖 Higher tends to be AI.</li> | |
</ul> | |
</li> | |
<li><b>Vocabulary Stability</b>: This feature measures the consistency of vocabulary use throughout the text. | |
<ul> | |
<li>🤖 Higher tends to be AI.</li> | |
</ul> | |
</li> | |
<li><b>Entity Ratio</b>: This feature calculates the proportion of named entities, such as names and places, within the text. | |
<ul> | |
<li>👤 Higher tends to be Human.</li> | |
</ul> | |
</li> | |
<li><b>Perplexity</b>: This feature assesses the predictability of the text based on the sequence of words. | |
<ul> | |
<li>👤 Higher tends to be Human.</li> | |
</ul> | |
</li> | |
</ul> | |
""" | |
gr.HTML(interpretation, label="Interpretation of Writing Analysis") | |
full_check_btn.click( | |
fn=main, | |
inputs=[ | |
ai_option, | |
plag_option, | |
input_text, | |
year_from, | |
month_from, | |
day_from, | |
year_to, | |
month_to, | |
day_to, | |
domains_to_skip, | |
source_block_size, | |
], | |
outputs=[ | |
bcLabel, | |
mcLabel, | |
sentenceBreakdown, | |
writing_analysis_plot, | |
QLabel, | |
], | |
api_name="main", | |
) | |
only_ai_btn.click( | |
fn=ai_generated_test, | |
inputs=[ai_option, bias_buster_selected, input_text], | |
outputs=[bcLabel, mcLabel], | |
api_name="ai_check", | |
) | |
quillbot_check.click( | |
fn=predict_quillbot, | |
inputs=[input_text, bias_buster_selected], | |
outputs=[QLabel], | |
api_name="quillbot_check", | |
) | |
only_plagiarism_btn.click( | |
# fn=plagiarism_check, | |
fn=html_highlight, | |
inputs=[ | |
plag_option, | |
input_text, | |
year_from, | |
month_from, | |
day_from, | |
year_to, | |
month_to, | |
day_to, | |
domains_to_skip, | |
source_block_size, | |
], | |
outputs=[ | |
sentenceBreakdown, | |
], | |
api_name="plagiarism_check", | |
) | |
depth_analysis_btn.click( | |
fn=depth_analysis, | |
inputs=[input_text, bias_buster_selected], | |
outputs=[writing_analysis_plot], | |
api_name="depth_analysis", | |
) | |
quillbot_highlighter_button.click( | |
fn=analyze_and_highlight_quillbot, | |
inputs=[input_text, bias_buster_selected], | |
outputs=[quillbot_highlighter_output], | |
api_name="humanized_highlighter", | |
) | |
bc_highlighter_button.click( | |
fn=analyze_and_highlight_bc, | |
inputs=[input_text, bias_buster_selected], | |
outputs=[bc_highlighter_output], | |
api_name="bc_highlighter", | |
) | |
date_from = "" | |
date_to = "" | |
if __name__ == "__main__": | |
demo.launch( | |
share=True, server_name="0.0.0.0", auth=("polygraf-admin", "test@aisd") | |
) | |