Spaces:

bakhil-aissa
/

prompt-injection-detector

Running

App Files Files Community

bakhil-aissa commited on 13 days ago

Commit

79efd3c

verified ·

1 Parent(s): f9360bd

Upload 2 files

Browse files

Files changed (2) hide show

app.py +72 -39
requirements.txt +5 -9

app.py CHANGED Viewed

@@ -1,9 +1,8 @@
-import streamlit as st
-import pandas as pd
 import numpy as np
 import onnxruntime as ort
 from transformers import AutoTokenizer
-from huggingface_hub import hf_hub_download
 import os
 # Global variables to store loaded models
@@ -19,10 +18,10 @@ def load_models():
     if sess is None:
         if os.path.exists("model_f16.onnx"):
-            st.write("Model already downloaded.")
             model_path = "model_f16.onnx"
         else:
-            st.write("Downloading model...")
             model_path = hf_hub_download(
                 repo_id="bakhil-aissa/anti_prompt_injection",
                 filename="model_f16.onnx",
@@ -33,41 +32,75 @@ def load_models():
     return tokenizer, sess
-def predict(text):
     """Predict function that uses the loaded models"""
-    enc = tokenizer([text], return_tensors="np", truncation=True, max_length=2048)
-    inputs = {"input_ids": enc["input_ids"], "attention_mask": enc["attention_mask"]}
-    logits = sess.run(["logits"], inputs)[0]
-    exp = np.exp(logits)
-    probs = exp / exp.sum(axis=1, keepdims=True)        # shape (1, num_classes)
-    return probs
-def main():
-    st.title("Anti Prompt Injection Detection")
-    # Load models when needed
-    global tokenizer, sess
-    tokenizer, sess = load_models()
-    st.subheader("Enter your text to check for prompt injection:")
-    text_input = st.text_area("Text Input", height=200)
-    confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)
-    if st.button("Check"):
-        if text_input:
-            try:
-                with st.spinner("Processing..."):
-                    # Call the predict function
-                    probs = predict(text_input)
-                jailbreak_prob = float(probs[0][1])  # index into batch
-                is_jailbreak = jailbreak_prob >= confidence_threshold
-                st.success(f"Is Jailbreak: {is_jailbreak}")
-                st.info(f"Jailbreak Probability: {jailbreak_prob:.4f}")
-            except Exception as e:
-                st.error(f"Error: {str(e)}")
-        else:
-            st.warning("Please enter some text to check.")
-# Only define functions, don't execute anything
-# Streamlit will automatically run the script when it's ready

+import gradio as gr
 import numpy as np
 import onnxruntime as ort
 from transformers import AutoTokenizer
+from huggingface_hub import hf_hub_download
 import os
 # Global variables to store loaded models
     if sess is None:
         if os.path.exists("model_f16.onnx"):
+            print("Model already downloaded.")
             model_path = "model_f16.onnx"
         else:
+            print("Downloading model...")
             model_path = hf_hub_download(
                 repo_id="bakhil-aissa/anti_prompt_injection",
                 filename="model_f16.onnx",
     return tokenizer, sess
+def predict(text, confidence_threshold):
     """Predict function that uses the loaded models"""
+    if not text.strip():
+        return "Please enter some text to check.", 0.0, False
+    try:
+        # Load models if not already loaded
+        load_models()
+        # Make prediction
+        enc = tokenizer([text], return_tensors="np", truncation=True, max_length=2048)
+        inputs = {"input_ids": enc["input_ids"], "attention_mask": enc["attention_mask"]}
+        logits = sess.run(["logits"], inputs)[0]
+        exp = np.exp(logits)
+        probs = exp / exp.sum(axis=1, keepdims=True)
+        jailbreak_prob = float(probs[0][1])
+        is_jailbreak = jailbreak_prob >= confidence_threshold
+        result_text = f"Is Jailbreak: {is_jailbreak}"
+        return result_text, jailbreak_prob, is_jailbreak
+    except Exception as e:
+        return f"Error: {str(e)}", 0.0, False
+# Create Gradio interface
+def create_interface():
+    with gr.Blocks(title="Anti Prompt Injection Detection") as demo:
+        gr.Markdown("# 🚫 Anti Prompt Injection Detection")
+        gr.Markdown("Enter your text to check for prompt injection attempts.")
+        with gr.Row():
+            with gr.Column():
+                text_input = gr.Textbox(
+                    label="Text Input",
+                    placeholder="Enter text to analyze...",
+                    lines=5,
+                    max_lines=10
+                )
+                confidence_threshold = gr.Slider(
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=0.5,
+                    step=0.01,
+                    label="Confidence Threshold"
+                )
+                check_button = gr.Button("Check Text", variant="primary")
+            with gr.Column():
+                result_text = gr.Textbox(label="Result", interactive=False)
+                probability = gr.Number(label="Jailbreak Probability", precision=4)
+                is_jailbreak = gr.Checkbox(label="Is Jailbreak", interactive=False)
+        # Set up the prediction
+        check_button.click(
+            fn=predict,
+            inputs=[text_input, confidence_threshold],
+            outputs=[result_text, probability, is_jailbreak]
+        )
+        gr.Markdown("---")
+        gr.Markdown("**How it works:** This tool analyzes text to detect potential prompt injection attempts that could bypass AI safety measures.")
+    return demo
+# Create and launch the interface
+if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch()
+else:
+    # For Hugging Face Spaces
+    demo = create_interface()

requirements.txt CHANGED Viewed

@@ -1,9 +1,5 @@
-fastapi
-huggingface_hub
-numpy
-onnxruntime
-pandas
-pydantic
-streamlit
-transformers
-torch

+gradio>=4.0.0
+transformers>=4.30.0
+onnxruntime>=1.15.0
+numpy>=1.21.0
+huggingface_hub>=0.16.0