pmkhanh7890's picture
1st
22e1b62
raw
history blame
13.4 kB
import warnings
import torchvision.transforms as transforms
from google_img_source_search import ReverseImageSearcher
# from src.images.CNN_model_classifier import predict_cnn
# from src.images.diffusion_model_classifier import (
# ImageClassifier,
# predict_single_image,
# )
warnings.simplefilter(
action="ignore",
category=FutureWarning,
) # disable FutureWarning
import gradio as gr # noqa: E402
from transformers import ( # noqa: E402
AutoModelForSequenceClassification,
AutoTokenizer,
pipeline,
)
from src.texts.MAGE.deployment import ( # noqa: E402
detect,
preprocess,
)
from src.texts.PASTED.pasted_lexicon import Detector # noqa: E402
from src.texts.Search_Text.search import ( # noqa: E402
get_important_sentences,
get_keywords,
is_human_written,
)
from src.images.Search_Image.search import (
compare_images,
get_image_from_path,
get_image_from_url,
)
def convert_score_range(score):
"""
Converts a score from the range [0, 1] to [-1, 1].
Args:
score: The original score in the range [0, 1].
Returns:
The converted score in the range [-1, 1].
"""
return 2 * score - 1
def generate_highlighted_text(text_scores):
"""
Generates a highlighted text string based on the given text and scores.
Args:
text_scores: A list of tuples, where each tuple contains a text
segment and its score.
Returns:
A string of HTML code with highlighted text.
"""
highlighted_text = ""
for text, score in text_scores:
# Map score to a color using a gradient
color = f"rgba(255, 0, 0, {1 - score})" # Red to green gradient
highlighted_text += (
f"<span style='background-color: {color}'>{text}</span>" # noqa
)
return highlighted_text
def separate_characters_with_mask(text, mask):
"""Separates characters in a string and pairs them with a mask sign.
Args:
text: The input string.
Returns:
A list of tuples, where each tuple contains a character and a mask.
"""
return [(char, mask) for char in text]
def detect_ai_text(model_name, search_engine, text):
if search_engine is True:
keywords = get_keywords(text)
important_sentences = get_important_sentences(text, keywords)
predictions = is_human_written(important_sentences[0])
print("keywords: ", keywords)
print("important_sentences: ", important_sentences)
print("predictions: ", predictions)
if predictions == -1:
caption = "[Found exact match] "
text_scores = list(zip([caption, text], [0, predictions]))
print("text_scores: ", text_scores)
return text_scores
if model_name == "SimLLM":
tokenize_input = SimLLM_tokenizer(text, return_tensors="pt")
outputs = SimLLM_model(**tokenize_input)
predictions = outputs.logits.argmax(dim=-1).item()
if predictions == 0:
predictions = "human-written"
else:
predictions = "machine-generated"
elif model_name == "MAGE":
processed_text = preprocess(text)
predictions = detect(
processed_text,
MAGE_tokenizer,
MAGE_model,
device,
)
elif model_name == "chatgpt-detector-roberta":
predictions = roberta_pipeline_en(text)[0]["label"]
if predictions == "Human":
predictions = "human-written"
else: # ChatGPT
predictions = "machine-generated"
elif model_name == "PASTED-Lexical":
predictions = detector(text)
if model_name != "PASTED-Lexical":
text_scores = list(zip([text], [predictions]))
else:
text_scores = []
for text, score in predictions:
new_score = convert_score_range(score) # normalize score
text_scores.append((text, new_score))
return text_scores
diffusion_model_path = (
"src/images/Diffusion/model_checkpoints/"
"image-classifier-step=7007-val_loss=0.09.ckpt"
)
cnn_model_path = "src/images/CNN/model_checkpoints/blur_jpg_prob0.5.pth"
def detect_ai_image(input_image_path, search_engine):
# if search_engine is True:
# Search image
rev_img_searcher = ReverseImageSearcher()
search_items = rev_img_searcher.search_by_file(input_image_path)
min_result_difference = 5000
result_image_url = ""
input_image = get_image_from_path(input_image_path)
for search_item in search_items:
# print(f'Title: {search_item.page_title}')
# print(f'Site: {search_item.page_url}')
# print(f'Img: {search_item.image_url}\n')
# Compare each search result image with the input image
result_image = get_image_from_url(search_item.image_url)
# input_image = get_image_from_url(search_item.image_url)
result_difference = compare_images(result_image, input_image)
print(f"Difference with search result: {result_difference}")
print(f"Result image url: {search_item.page_url}\n")
if min_result_difference > result_difference:
min_result_difference = result_difference
result_image_url = search_item.image_url
result_page_url = search_item.page_url
if result_difference == 0:
break
if min_result_difference == 0:
result = f"<h1>Input image is LIKELY SIMILAR to image from:</h1>"\
f"<ul>"\
f'<li>\nPage URL: <a href="url">{result_page_url}</a></li>'\
f'<li>\nImage URL: <a href="url">{result_image_url}</a></li>'\
f"<li>\nDifference score: {min_result_difference}</li>"\
f"</ul>"
elif 10 > min_result_difference > 0:
result = f"<h1>Input image is potentially a VARIATRION from:</h1>"\
f"<ul>"\
f'<li>\nPage URL: <a href="url">{result_page_url}</a></li>'\
f'<li>\nImage URL: <a href="url">{result_image_url}</a></li>'\
f"<li>\nDifference score: {min_result_difference}</li>"\
f"</ul>"
elif min_result_difference < 5000:
result = f"<h1>Input image is not similar to any search results.</h1>"\
f"<ul>"\
f'<li>\nPage URL: <a href="url">{result_page_url}</a></li>'\
f'<li>\nImage URL: <a href="url">{result_image_url}</a></li>'\
f"<li>\nDifference score: {min_result_difference}</li>"\
f"</ul>"
else:
result = f"<h1>No search result found.</h1>"\
return result
# def get_prediction_diffusion(image):
# model = ImageClassifier.load_from_checkpoint(diffusion_model_path)
# prediction = predict_single_image(image, model)
# return (prediction >= 0.5, prediction)
# def get_prediction_cnn(image):
# prediction = predict_cnn(image, cnn_model_path)
# return (prediction >= 0.5, prediction)
# # Define the transformations for the image
# transform = transforms.Compose(
# [
# transforms.Resize((224, 224)), # Image size expected by ResNet50
# transforms.ToTensor(),
# transforms.Normalize(
# mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225],
# ),
# ],
# )
# image_tensor = transform(inp)
# pred_diff, prob_diff = get_prediction_diffusion(image_tensor)
# pred_cnn, prob_cnn = get_prediction_cnn(image_tensor)
# verdict = (
# "AI Generated" if (pred_diff or pred_cnn) else "No GenAI detected"
# )
# return (
# f"<h1>{verdict}</h1>"
# f"<ul>"
# f"<li>Diffusion detection score: {prob_diff:.1%} "
# f"{'(MATCH)' if pred_diff else ''}</li>"
# f"<li>CNN detection score: {prob_cnn:.1%} "
# f"{'(MATCH)' if pred_cnn else ''}</li>"
# f"</ul>"
# )
# Define GPUs
device = "cpu" # use 'cuda:0' if GPU is available
# init MAGE
model_dir = "yaful/MAGE" # model in huggingface
MAGE_tokenizer = AutoTokenizer.from_pretrained(model_dir)
MAGE_model = AutoModelForSequenceClassification.from_pretrained(model_dir).to(
device,
)
# init chatgpt-detector-roberta
model_dir = "Hello-SimpleAI/chatgpt-detector-roberta" # model in huggingface
roberta_pipeline_en = pipeline(task="text-classification", model=model_dir)
# init PASTED
model_dir = "linzw/PASTED-Lexical"
detector = Detector(model_dir, device)
# init SimLLM
model_path = "./models/single_model_detector"
SimLLM_tokenizer = AutoTokenizer.from_pretrained(model_path)
SimLLM_model = AutoModelForSequenceClassification.from_pretrained(model_path)
# Init variable for UI
title = """
<center>
<h1> AI-generated content detection </h1>
<b> Demo by NICT & Tokyo Techies <b>
</center>
"""
examples = [
[
"SimLLM",
False,
"""\
The BBC's long-running consumer rights series Watchdog is to end as a \
standalone programme, instead becoming part of The One Show. Watchdog \
began in 1980 as a strand of Nationwide, but proved so popular it \
became a separate programme in 1985. Co-host Steph McGovern has moved \
to Channel 4, but Matt Allwright and Nikki Fox will stay to front the \
new strand. The BBC said they would investigate viewer complaints all \
year round rather than for two series a year.
""",
],
[
"chatgpt-detector-roberta",
False,
"""\
Artificial intelligence (AI) is the science of making machines \
intelligent. It enables computers to learn from data, recognize \
patterns, and make decisions. AI powers many technologies we use \
daily, from voice assistants to self-driving cars. It's rapidly \
evolving, promising to revolutionize various industries and reshape \
the future.""",
],
]
model_remark = """<left>
Model sources:
<a href="https://github.com/Tokyo-Techies/prj-nict-ai-content-detection">SimLLM</a>,
<a href="https://github.com/yafuly/MAGE">MAGE</a>,
<a href="https://huggingface.co/Hello-SimpleAI/chatgpt-detector-roberta">chatgpt-detector-roberta</a>,
<a href="https://github.com/Linzwcs/PASTED">PASTED-Lexical</a>.
</left>
""" # noqa: E501
image_samples = [
["src/images/samples/fake_dalle.jpg", "Generated (Dall-E)"],
["src/images/samples/fake_midjourney.png", "Generated (MidJourney)"],
["src/images/samples/fake_stable.jpg", "Generated (Stable Diffusion)"],
["src/images/samples/fake_cnn.png", "Generated (GAN)"],
["src/images/samples/real.png", "Organic"],
[
"https://p.potaufeu.asahi.com/1831-p/picture/27695628/89644a996fdd0cfc9e06398c64320fbe.jpg", # noqa E501
"Internet GenAI",
],
]
image_samples_path = [i[0] for i in image_samples]
# UI
with gr.Blocks() as demo:
with gr.Row():
gr.HTML(title)
with gr.Row():
with gr.Tab("Text"):
with gr.Row():
with gr.Column():
model = gr.Dropdown(
[
"SimLLM",
"MAGE",
"chatgpt-detector-roberta",
"PASTED-Lexical",
],
label="Detection model",
)
search_engine = gr.Checkbox(label="Use search engine")
gr.HTML(model_remark)
with gr.Column():
text_input = gr.Textbox(
label="Input text",
placeholder="Enter text here...",
lines=5,
)
output = gr.HighlightedText(
label="Detection results",
combine_adjacent=True,
show_legend=True,
color_map={
"human-written": "#7d58cf",
"machine-generated": "#e34242",
},
)
gr.Examples(
examples=examples,
inputs=[model, search_engine, text_input],
)
model.change(
detect_ai_text,
inputs=[model, search_engine, text_input],
outputs=output,
)
search_engine.change(
detect_ai_text,
inputs=[model, search_engine, text_input],
outputs=output,
)
text_input.change(
detect_ai_text,
inputs=[model, search_engine, text_input],
outputs=output,
)
with gr.Tab("Images"):
with gr.Row():
input_image = gr.Image(type="filepath")
with gr.Column():
output_image = gr.Markdown(height=400)
gr.Examples(
examples=image_samples,
inputs=input_image,
)
input_image.change(
detect_ai_image,
inputs=input_image,
outputs=output_image,
)
# demo.launch(share=True)
demo.launch(allowed_paths=image_samples_path, share=True)