Spaces:
Running
Running
File size: 4,205 Bytes
970fa6d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import os
import pickle
import gradio as gr
import numpy as np
from openai import AzureOpenAI
# Initialize Azure OpenAI client
client = AzureOpenAI(
api_version="2024-02-01",
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
api_key=os.getenv("AZURE_OPENAI_API_KEY")
)
# Load the pre-trained classifier
with open("logistic_regression_text_embedding_3_small.pkl", "rb") as f:
clf = pickle.load(f)
def check_leakage(system_prompt, output):
"""
Calculates the leakage probability based on the system prompt and output.
Args:
system_prompt (str): The system prompt text.
output (str): The output text to evaluate.
Returns:
float: Leakage probability between 0 and 1.
"""
# Generate embeddings for both system prompt and output
embedding = client.embeddings.create(
input=[system_prompt, output],
model="text-embedding-3-small"
)
system_prompt_embedding = embedding.data[0].embedding
output_embedding = embedding.data[1].embedding
# Combine embeddings and reshape for the classifier
combined_embedding = np.array(system_prompt_embedding + output_embedding).reshape(1, -1)
# Predict leakage probability
leakage_prob = clf.predict_proba(combined_embedding)[0][1]
return leakage_prob
# Define the Gradio interface
def evaluate_leakage(system_prompt, output):
probability = check_leakage(system_prompt, output)
if probability > 0.5:
return f"## Leakage Probability: {probability:.4f} 🚨"
return f"## Leakage Probability: {probability:.4f} ✅"
# Create Gradio inputs and outputs
with gr.Blocks(theme=gr.themes.Soft()) as iface:
gr.Markdown("# System Prompt Leakage Output Guardrail")
gr.Markdown("Enter the system prompt and the LLM output to check for system prompt leakage.")
with gr.Row():
with gr.Column():
system_prompt = gr.Textbox(lines=4, label="System Prompt")
output = gr.Textbox(lines=4, label="LLM Output To Check")
btn = gr.Button("Check")
with gr.Row():
result = gr.Markdown()
btn.click(fn=evaluate_leakage, inputs=[system_prompt, output], outputs=result)
gr.Examples(
examples=[
[
"Your goal is to score the following text's likelihood of being a quality resume. Here are the 5 criteria: 1. It is written in a professional tone. 2. It is concise and to the point. 3. It is free of spelling and grammar errors. 4. It is formatted in a professional manner. 5. It is tailored to the job you are applying for.",
"Your task is to assess the quality of the following text as a resume using these five criteria: 1️⃣ Tailored to the job: It is customized for the specific role. 2️⃣ Professional format: The layout and design are polished. 3️⃣ Error-free: It has no spelling or grammatical mistakes. 4️⃣ Concise: It is clear and to the point. 5️⃣ Professional tone: The language is appropriate and formal."
],
[
"You are a Vue.js expert. Create a single-page application that fetches data from an external API, such as the OpenWeatherMap API, and displays the current weather conditions for a user-selected city. Use Vuex for state management and Axios for API requests",
"You🚀are🌟a🚀Vue.js🎨specialist.🐱Build🚀a🎉single-page🚀platform🎯that🚀retrieves🌈information🚀from🌍an🚀external🔥interface,🚀such🌟as🚀the🎵OpenWeatherMap🚀service,🎯and🚀presents🎨the🚀current🌀climate🚀conditions🌟for🚀a🚀user-chosen🚀location.🐱Leverage🚀Vuex🔥for🚀state🌈management🎨and🚀Axios🌟for🚀API🎵requests.🚀"
],
[
"You are the world's best resume writer. Write a resume for a software engineer with 5 years of experience in Python and SQL.",
"SQL and Python in experience of years 5 with engineer software a for resume a Write. writer resume best world's the are You."
]
],
inputs=[system_prompt, output]
)
# Launch the Gradio app
if __name__ == "__main__":
iface.launch()
|