Spaces:

jer233
/

AI_Check_project

Runtime error

File size: 3,719 Bytes

35af015
98bdd9f
 
 
35af015
98bdd9f
35af015
98bdd9f
 
35af015
98bdd9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35af015
 
 
 
98bdd9f
 
35af015
 
98bdd9f
 
35af015
98bdd9f
35af015
98bdd9f
 
 
 
 
35af015
98bdd9f
 
 
 
35af015
 
98bdd9f
 
 
 
 
 
 
 
 
 
 
 
 
 
35af015
98bdd9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35af015
98bdd9f

import gradio as gr
from transformers import AutoTokenizer, AutoModel
# from MMD_calculate import mmd_two_sample_baseline  # Adjust path based on your structure
# from utils_MMD import extract_features  # Example helper from your utils

MINIMUM_TOKENS = 64

def count_tokens(text, tokenizer):
    return len(tokenizer(text).input_ids)

def run_test_power(model_name, tokenizer_name, real_text, generated_text, N):
    """
    Runs the test power calculation for provided real and generated texts.
    """

    # load tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(model_name).cuda()
    model = AutoModel.from_pretrained(model)

    if count_tokens(real_text, tokenizer) < MINIMUM_TOKENS or count_tokens(generated_text, tokenizer) < MINIMUM_TOKENS:
        return "Too short length. Need minimum 64 tokens to calculated Test Power."

    # Extract features
    fea_real_ls = extract_features(model_name, tokenizer_name, [real_text])
    fea_generated_ls = extract_features(model_name, tokenizer_name, [generated_text])

    #  Calculate test power list
    test_power_ls = mmd_two_sample_baseline(fea_real_ls, fea_generated_ls, N=10)

    # Compute the average test power value
    power_test_value = sum(test_power_ls) / len(test_power_ls)

    # Classify the text
    if power_test_value < threshold:
        return "Prediction: Human"
    else:
        return "Prediction: AI"



css = """
#header { text-align: center; font-size: 1.5em; margin-bottom: 20px; }
#output-text { font-weight: bold; font-size: 1.2em; }
"""

# Gradio App
with gr.Blocks(css=css) as app:
    with gr.Row():
        gr.HTML('<div id="header">Human or AI Text Detector</div>')
    with gr.Row():
        gr.Markdown(
            """
            [Paper](https://openreview.net/forum?id=z9j7wctoGV) | [Code](https://github.com/xLearn-AU/R-Detect) | [Contact](mailto:[email protected])
            """
        )
    with gr.Row():
        input_text = gr.Textbox(
            label="Input Text",
            placeholder="Enter the text to check",
            lines=8,
        )
    with gr.Row():
        model_name = gr.Dropdown(
            ["gpt2-medium", "gpt2-large", "t5-large", "t5-small", "roberta-base", "roberta-base-openai-detector", "falcon-rw-1b"],
            label="Select Model",
            value="gpt2-medium",
        )
    with gr.Row():
        submit_button = gr.Button("Run Detection", variant="primary")
        clear_button = gr.Button("Clear", variant="secondary")
    with gr.Row():
        output = gr.Textbox(
            label = "Prediction",
            placeholder = "Prediction: Human or AI",
            elem_id = "output-text",
        )
    with gr.Accordion("Disclaimer", open=False):
        gr.Markdown(
            """
            - **Disclaimer**: This tool is for demonstration purposes only. It is not a foolproof AI detector.
            - **Accuracy**: Results may vary based on input length and quality.
            """
        )
    with gr.Accordion("Citations", open=False):
        gr.Markdown(
            """
            ```
            @inproceedings{zhangs2024MMDMP,
                title={Detecting Machine-Generated Texts by Multi-Population Aware Optimization for Maximum Mean Discrepancy},
                author={Zhang, Shuhai and Song, Yiliao and Yang, Jiahao and Li, Yuanqing and Han, Bo and Tan, Mingkui},
                booktitle = {International Conference on Learning Representations (ICLR)},
                year={2024}
            }
            ```
            """
        )
    submit_button.click(detect_text, inputs=[input_text, model_name], outputs=output)
    clear_button.click(lambda: ("", ""), inputs=[], outputs=[input_text, output])

app.launch()