Spaces:

jer233
/

AI_Check_project

Runtime error

App Files Files Community

jer233 commited on Dec 26, 2024

Commit

98bdd9f

verified ·

1 Parent(s): 515bde6

Update demo/demo.py

Browse files

Files changed (1) hide show

demo/demo.py +84 -69

demo/demo.py CHANGED Viewed

@@ -1,86 +1,101 @@
 import gradio as gr
-from MMD_calculate import MMDMPDetector
-detector = MMDMPDetector()  # Initialize your MMD-MP detector
-MINIMUM_TOKENS = 64  # Minimum number of tokens for detection
-def count_tokens(text):
-    return len(text.split())  # Count the number of tokens (words) in the text
-def run_detector(input_text):
-    # Check if input meets the token requirement
-    if count_tokens(input_text) < MINIMUM_TOKENS:
-        return f"Error: Text is too short! At least {MINIMUM_TOKENS} tokens are required."
-    # Perform detection (replace this with your model's prediction logic)
-    prediction = detector.predict(input_text)
-    return f"Result: {prediction}"
-def change_mode(mode):
-    if mode == "Low False Positive Rate":
-        detector.set_mode("low-fpr")  # Adjust detector mode
-    elif mode == "High Accuracy":
-        detector.set_mode("accuracy")
-    return f"Mode set to: {mode}"
 css = """
-.green { color: black!important; line-height:1.9em; padding: 0.2em 0.2em; background: #ccffcc; border-radius:0.5rem;}
-.red { color: black!important; line-height:1.9em; padding: 0.2em 0.2em; background: #ffad99; border-radius:0.5rem;}
-.hyperlinks {
-    display: flex;
-    align-items: center;
-    justify-content: flex-end;
-    padding: 12px;
-    margin: 0 10px;
-    text-decoration: none;
-    color: #000;
-}
 """
-with gr.Blocks(css=css, theme=gr.themes.Default(font=[gr.themes.GoogleFont("Inconsolata"), "Arial", "sans-serif"])) as app:
-    # Header Row
     with gr.Row():
-        with gr.Column(scale=3):
-            gr.HTML("<h1>Binoculars: Zero-Shot LLM-Text Detector</h1>")
-        with gr.Column(scale=1):
-            gr.HTML("""
-                <p class="hyperlinks">
-                <a href="https://arxiv.org/abs/2401.12070" target="_blank">Paper</a> |
-                <a href="https://github.com/AHans30/Binoculars" target="_blank">Code</a> |
-                <a href="mailto:[email protected]" target="_blank">Contact</a>
-                </p>
-            """)
-    # Input Section
     with gr.Row():
-        input_text = gr.Textbox(placeholder="Enter text here...", lines=8, label="Input Text")
-    # Mode Selector and Buttons
     with gr.Row():
-        mode_selector = gr.Dropdown(
-            choices=["Low False Positive Rate", "High Accuracy"],
-            label="Detection Mode",
-            value="Low False Positive Rate"
         )
-        submit_button = gr.Button("Run Binoculars", variant="primary")
-        clear_button = gr.Button("Clear")
-    # Output Section
     with gr.Row():
-        output_text = gr.Textbox(label="Prediction", value="Results will appear here...")
-    # Disclaimer Section
     with gr.Accordion("Disclaimer", open=False):
-        gr.Markdown("""
-            - **Accuracy**: This detector uses state-of-the-art techniques, but no model is perfect.
-            - **Mode Information**:
-                - High Accuracy: Maximizes accuracy by adjusting thresholds.
-                - Low False Positive Rate: Reduces human-written text being falsely flagged as AI-generated.
-            - **Limitations**: Detection is best on texts with 64–300 tokens. Very short or extremely long texts may lead to inaccurate results.
-        """)
-    # Bind Functions to Buttons
-    submit_button.click(run_detector, inputs=input_text, outputs=output_text)
-    clear_button.click(lambda: ("", ""), outputs=[input_text, output_text])
-    mode_selector.change(change_mode, inputs=mode_selector, outputs=mode_selector)

 import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModel
+# from MMD_calculate import mmd_two_sample_baseline  # Adjust path based on your structure
+# from utils_MMD import extract_features  # Example helper from your utils
+MINIMUM_TOKENS = 64
+def count_tokens(text, tokenizer):
+    return len(tokenizer(text).input_ids)
+def run_test_power(model_name, tokenizer_name, real_text, generated_text, N):
+    """
+    Runs the test power calculation for provided real and generated texts.
+    """
+    # load tokenizer and model
+    tokenizer = AutoTokenizer.from_pretrained(model_name).cuda()
+    model = AutoModel.from_pretrained(model)
+    if count_tokens(real_text, tokenizer) < MINIMUM_TOKENS or count_tokens(generated_text, tokenizer) < MINIMUM_TOKENS:
+        return "Too short length. Need minimum 64 tokens to calculated Test Power."
+    # Extract features
+    fea_real_ls = extract_features(model_name, tokenizer_name, [real_text])
+    fea_generated_ls = extract_features(model_name, tokenizer_name, [generated_text])
+    #  Calculate test power list
+    test_power_ls = mmd_two_sample_baseline(fea_real_ls, fea_generated_ls, N=10)
+    # Compute the average test power value
+    power_test_value = sum(test_power_ls) / len(test_power_ls)
+    # Classify the text
+    if power_test_value < threshold:
+        return "Prediction: Human"
+    else:
+        return "Prediction: AI"
 css = """
+#header { text-align: center; font-size: 1.5em; margin-bottom: 20px; }
+#output-text { font-weight: bold; font-size: 1.2em; }
 """
+# Gradio App
+with gr.Blocks(css=css) as app:
     with gr.Row():
+        gr.HTML('<div id="header">Human or AI Text Detector</div>')
     with gr.Row():
+        gr.Markdown(
+            """
+            [Paper](https://openreview.net/forum?id=z9j7wctoGV) | [Code](https://github.com/xLearn-AU/R-Detect) | [Contact](mailto:[email protected])
+            """
+        )
     with gr.Row():
+        input_text = gr.Textbox(
+            label="Input Text",
+            placeholder="Enter the text to check",
+            lines=8,
         )
     with gr.Row():
+        model_name = gr.Dropdown(
+            ["gpt2-medium", "gpt2-large", "t5-large", "t5-small", "roberta-base", "roberta-base-openai-detector", "falcon-rw-1b"],
+            label="Select Model",
+            value="gpt2-medium",
+        )
+    with gr.Row():
+        submit_button = gr.Button("Run Detection", variant="primary")
+        clear_button = gr.Button("Clear", variant="secondary")
+    with gr.Row():
+        output = gr.Textbox(
+            label = "Prediction",
+            placeholder = "Prediction: Human or AI",
+            elem_id = "output-text",
+        )
     with gr.Accordion("Disclaimer", open=False):
+        gr.Markdown(
+            """
+            - **Disclaimer**: This tool is for demonstration purposes only. It is not a foolproof AI detector.
+            - **Accuracy**: Results may vary based on input length and quality.
+            """
+        )
+    with gr.Accordion("Citations", open=False):
+        gr.Markdown(
+            """
+            ```
+            @inproceedings{zhangs2024MMDMP,
+                title={Detecting Machine-Generated Texts by Multi-Population Aware Optimization for Maximum Mean Discrepancy},
+                author={Zhang, Shuhai and Song, Yiliao and Yang, Jiahao and Li, Yuanqing and Han, Bo and Tan, Mingkui},
+                booktitle = {International Conference on Learning Representations (ICLR)},
+                year={2024}
+            }
+            ```
+            """
+        )
+    submit_button.click(detect_text, inputs=[input_text, model_name], outputs=output)
+    clear_button.click(lambda: ("", ""), inputs=[], outputs=[input_text, output])
+app.launch()