Spaces:

polygraf-ai
/

copyright_checker

Running

App Files Files Community

copyright_checker / app.py

aliasgerovs

Updated

0eaca07 5 months ago

raw

history blame

12.5 kB

	import gradio as gr
	import numpy as np
	from datetime import date
	from predictors import predict_bc_scores, predict_mc_scores
	from predictors import update,update_main, correct_text, split_text
	from analysis import depth_analysis
	from predictors import predict_quillbot
	from plagiarism import plagiarism_check, build_date, html_highlight
	from highlighter import segmented_higlighter
	from utils import extract_text_from_pdf, len_validator, extract_text_from_html
	import yaml
	from functools import partial
	from audio import assemblyai_transcribe
	import yt_dlp
	import os
	import pandas as pd
	from const import plag_script
	from datasets import load_dataset, Dataset

	np.set_printoptions(suppress=True)

	with open("config.yaml", "r") as file:
	params = yaml.safe_load(file)

	model_list = params["MC_OUTPUT_LABELS"]


	analyze_and_highlight_bc = partial(segmented_higlighter, model_type="bc")
	analyze_and_highlight_quillbot = partial(
	segmented_higlighter, model_type="quillbot"
	)



	def save_request(email, video_url):
	# Save the email and video URL to the CSV file
	if email is None or email == "":
	return "Please enter your email.", gr.update(visible=True)
	dat = load_dataset(requests_repo)["train"]
	df = dat.to_pandas()
	new_row = pd.DataFrame(
	{"email": [email], "video_url": [video_url], "status": "pending"}
	)
	df = pd.concat([df, new_row], ignore_index=True)
	dat = Dataset.from_pandas(df)
	dat.push_to_hub(requests_repo)
	return "Your request has been saved.", gr.update(visible=False)

	def ai_generated_test(option, bias_buster_selected, input):
	if bias_buster_selected:
	input = update(input)
	if option == "Human vs AI":
	return predict_bc_scores(input), None
	elif option == "Human vs AI Source Models":
	return predict_bc_scores(input), predict_mc_scores(input)
	return None, None


	# COMBINED
	def main(
	ai_option,
	plag_option,
	input,
	year_from,
	month_from,
	day_from,
	year_to,
	month_to,
	day_to,
	domains_to_skip,
	source_block_size,
	):

	formatted_tokens = html_highlight(
	plag_option,
	input,
	year_from,
	month_from,
	day_from,
	year_to,
	month_to,
	day_to,
	domains_to_skip,
	source_block_size,
	)
	depth_analysis_plot = depth_analysis(input, bias_buster_selected)
	bc_score = predict_bc_scores(input)
	mc_score = predict_mc_scores(input)
	quilscore = predict_quillbot(input, bias_buster_selected)

	return (
	bc_score,
	mc_score,
	formatted_tokens,
	depth_analysis_plot,
	quilscore,
	)


	# START OF GRADIO

	title = "AI Detection and Source Analysis"
	months = {
	"January": "01",
	"February": "02",
	"March": "03",
	"April": "04",
	"May": "05",
	"June": "06",
	"July": "07",
	"August": "08",
	"September": "09",
	"October": "10",
	"November": "11",
	"December": "12",
	}


	with gr.Blocks() as demo:
	today = date.today()
	# dd/mm/YY
	d1 = today.strftime("%d/%B/%Y")
	d1 = d1.split("/")

	domain_list = ["com", "org", "net", "int", "edu", "gov", "mil"]
	gr.Markdown(
	"""
	# AI Detection and Source Analysis
	"""
	)
	with gr.Row():
	input_text = gr.Textbox(label="Input text", lines=6, placeholder="")
	file_input = gr.File(label="Upload PDF")
	file_input.change(
	fn=extract_text_from_pdf, inputs=file_input, outputs=input_text
	)


	with gr.Column(visible=False) as request_row:
	with gr.Row():
	email_input = gr.Textbox(label="Email")
	youtube_url_input = gr.Textbox(label="YouTube Video URL")
	with gr.Row():
	video_submit_btn = gr.Button("Submit Video Request")


	with gr.Row():
	url_input = gr.Textbox(
	label="Input Page URL to check", lines=1, placeholder="")
	url_input.change(
	fn=extract_text_from_html, inputs=url_input, outputs=input_text)

	audio_url_input = gr.Textbox(label="Input YouTube URL to check", lines=1, placeholder="")
	audio_url_input.change(
	fn=assemblyai_transcribe, inputs=audio_url_input, outputs=input_text
	)


	video_submit_btn.click(
	fn=save_request,
	inputs=[email_input, youtube_url_input],
	outputs=[input_text, request_row],
	api_name="video_request",
	)

	char_count = gr.Textbox(label="Minumum Character Limit Check")
	input_text.change(fn=len_validator, inputs=input_text, outputs=char_count)

	with gr.Row():
	btn = gr.Button("Deception Filter")
	out = gr.Textbox(label="Corrected Full Input", interactive=False)
	corrections_output = gr.Textbox(label="Corrections", interactive=False)
	btn.click(fn=update_main, inputs=input_text, outputs=[out, corrections_output])

	with gr.Row():
	models = gr.Dropdown(
	model_list,
	value=model_list,
	multiselect=True,
	label="Models to test against",
	)

	with gr.Row():
	with gr.Column():
	ai_option = gr.Radio(
	[
	"Human vs AI",
	"Human vs AI Source Models",
	],
	label="Choose an option please.",
	)

	with gr.Column():
	bias_buster_selected = gr.Checkbox(label="Bias Remover")

	with gr.Column():
	plag_option = gr.Radio(
	["Standard", "Advanced"], label="Choose an option please."
	)
	with gr.Row():
	source_block_size = gr.Dropdown(
	choices=["Sentence", "Paragraph"],
	label="Source Check Granularity",
	value="Sentence",
	interactive=True,
	)

	with gr.Row():
	with gr.Column():
	only_ai_btn = gr.Button("AI Check")
	with gr.Column():
	only_plagiarism_btn = gr.Button("Source Check")

	with gr.Column():
	quillbot_check = gr.Button("Humanized Text Check")

	with gr.Row():
	with gr.Column():
	bc_highlighter_button = gr.Button("Human vs. AI Highlighter")
	with gr.Column():
	quillbot_highlighter_button = gr.Button("Humanized Highlighter")

	with gr.Row():
	depth_analysis_btn = gr.Button("Detailed Writing Analysis")

	with gr.Row():
	full_check_btn = gr.Button("Full Check")

	gr.Markdown(
	"""
	## Output
	"""
	)

	with gr.Row():
	with gr.Column():
	bcLabel = gr.Label(label="Source")
	with gr.Column():
	mcLabel = gr.Label(label="Creator")
	with gr.Row():
	with gr.Column():
	bc_highlighter_output = gr.HTML(label="Human vs. AI Highlighter")

	with gr.Row():
	with gr.Column():
	QLabel = gr.Label(label="Humanized")

	with gr.Row():
	quillbot_highlighter_output = gr.HTML(label="Humanized Highlighter")

	with gr.Group():
	with gr.Row():
	month_from = gr.Dropdown(
	choices=months,
	label="From Month",
	value="January",
	interactive=True,
	)
	day_from = gr.Textbox(label="From Day", value="01")
	year_from = gr.Textbox(label="From Year", value="2000")
	# from_date_button = gr.Button("Submit")

	with gr.Row():
	month_to = gr.Dropdown(
	choices=months,
	label="To Month",
	value=d1[1],
	interactive=True,
	)
	day_to = gr.Textbox(label="To Day", value=d1[0])
	year_to = gr.Textbox(label="To Year", value=d1[2])
	# to_date_button = gr.Button("Submit")
	with gr.Row():
	domains_to_skip = gr.Dropdown(
	domain_list,
	multiselect=True,
	label="Domain To Skip",
	)

	with gr.Row():
	with gr.Column():
	sentenceBreakdown = gr.HTML(
	label="Source Detection Sentence Breakdown",
	value="Source Detection Sentence Breakdown",
	)

	with gr.Row():
	with gr.Column():
	writing_analysis_plot = gr.Plot(label="Writing Analysis Plot")
	with gr.Column():
	interpretation = """
	<h2>Writing Analysis Interpretation</h2>
	<ul>
	<li><b>Lexical Diversity</b>: This feature measures the range of unique words used in a text.
	<ul>
	<li>🤖 Higher tends to be AI.</li>
	</ul>
	</li>
	<li><b>Vocabulary Level</b>: This feature assesses the complexity of the words used in a text.
	<ul>
	<li>🤖 Higher tends to be AI.</li>
	</ul>
	</li>
	<li><b>Unique Words</b>: This feature counts the number of words that appear only once within the text.
	<ul>
	<li>🤖 Higher tends to be AI.</li>
	</ul>
	</li>
	<li><b>Determiner Use</b>: This feature tracks the frequency of articles and quantifiers in the text.
	<ul>
	<li>🤖 Higher tends to be AI.</li>
	</ul>
	</li>
	<li><b>Punctuation Variety</b>: This feature indicates the diversity of punctuation marks used in the text.
	<ul>
	<li>👤 Higher tends to be Human.</li>
	</ul>
	</li>
	<li><b>Sentence Depth</b>: This feature evaluates the complexity of the sentence structures used in the text.
	<ul>
	<li>🤖 Higher tends to be AI.</li>
	</ul>
	</li>
	<li><b>Vocabulary Stability</b>: This feature measures the consistency of vocabulary use throughout the text.
	<ul>
	<li>🤖 Higher tends to be AI.</li>
	</ul>
	</li>
	<li><b>Entity Ratio</b>: This feature calculates the proportion of named entities, such as names and places, within the text.
	<ul>
	<li>👤 Higher tends to be Human.</li>
	</ul>
	</li>
	<li><b>Perplexity</b>: This feature assesses the predictability of the text based on the sequence of words.
	<ul>
	<li>👤 Higher tends to be Human.</li>
	</ul>
	</li>
	</ul>

	"""
	gr.HTML(interpretation, label="Interpretation of Writing Analysis")

	full_check_btn.click(
	fn=main,
	inputs=[
	ai_option,
	plag_option,
	input_text,
	year_from,
	month_from,
	day_from,
	year_to,
	month_to,
	day_to,
	domains_to_skip,
	source_block_size,
	],
	outputs=[
	bcLabel,
	mcLabel,
	sentenceBreakdown,
	writing_analysis_plot,
	QLabel,
	],
	api_name="main",
	)

	only_ai_btn.click(
	fn=ai_generated_test,
	inputs=[ai_option, bias_buster_selected, input_text],
	outputs=[bcLabel, mcLabel],
	api_name="ai_check",
	)

	quillbot_check.click(
	fn=predict_quillbot,
	inputs=[input_text, bias_buster_selected],
	outputs=[QLabel],
	api_name="quillbot_check",
	)

	only_plagiarism_btn.click(
	# fn=plagiarism_check,
	fn=html_highlight,
	inputs=[
	plag_option,
	input_text,
	year_from,
	month_from,
	day_from,
	year_to,
	month_to,
	day_to,
	domains_to_skip,
	source_block_size,
	],
	outputs=[
	sentenceBreakdown,
	],
	api_name="plagiarism_check",
	)

	depth_analysis_btn.click(
	fn=depth_analysis,
	inputs=[input_text, bias_buster_selected],
	outputs=[writing_analysis_plot],
	api_name="depth_analysis",
	)

	quillbot_highlighter_button.click(
	fn=analyze_and_highlight_quillbot,
	inputs=[input_text, bias_buster_selected],
	outputs=[quillbot_highlighter_output],
	api_name="humanized_highlighter",
	)

	bc_highlighter_button.click(
	fn=analyze_and_highlight_bc,
	inputs=[input_text, bias_buster_selected],
	outputs=[bc_highlighter_output],
	api_name="bc_highlighter",
	)

	date_from = ""
	date_to = ""

	if __name__ == "__main__":
	demo.launch(
	share=True, server_name="0.0.0.0", server_port=80, auth=("polygraf-admin", "test@aisd")
	)