gabrielchua commited on
Commit
970fa6d
·
verified ·
1 Parent(s): 0fe24af

Upload 2 files

Browse files
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pickle
3
+ import gradio as gr
4
+ import numpy as np
5
+ from openai import AzureOpenAI
6
+
7
+ # Initialize Azure OpenAI client
8
+ client = AzureOpenAI(
9
+ api_version="2024-02-01",
10
+ azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
11
+ api_key=os.getenv("AZURE_OPENAI_API_KEY")
12
+ )
13
+
14
+ # Load the pre-trained classifier
15
+ with open("logistic_regression_text_embedding_3_small.pkl", "rb") as f:
16
+ clf = pickle.load(f)
17
+
18
+ def check_leakage(system_prompt, output):
19
+ """
20
+ Calculates the leakage probability based on the system prompt and output.
21
+
22
+ Args:
23
+ system_prompt (str): The system prompt text.
24
+ output (str): The output text to evaluate.
25
+
26
+ Returns:
27
+ float: Leakage probability between 0 and 1.
28
+ """
29
+ # Generate embeddings for both system prompt and output
30
+ embedding = client.embeddings.create(
31
+ input=[system_prompt, output],
32
+ model="text-embedding-3-small"
33
+ )
34
+
35
+ system_prompt_embedding = embedding.data[0].embedding
36
+ output_embedding = embedding.data[1].embedding
37
+
38
+ # Combine embeddings and reshape for the classifier
39
+ combined_embedding = np.array(system_prompt_embedding + output_embedding).reshape(1, -1)
40
+
41
+ # Predict leakage probability
42
+ leakage_prob = clf.predict_proba(combined_embedding)[0][1]
43
+
44
+ return leakage_prob
45
+
46
+ # Define the Gradio interface
47
+ def evaluate_leakage(system_prompt, output):
48
+ probability = check_leakage(system_prompt, output)
49
+ if probability > 0.5:
50
+ return f"## Leakage Probability: {probability:.4f} 🚨"
51
+ return f"## Leakage Probability: {probability:.4f} ✅"
52
+
53
+ # Create Gradio inputs and outputs
54
+ with gr.Blocks(theme=gr.themes.Soft()) as iface:
55
+ gr.Markdown("# System Prompt Leakage Output Guardrail")
56
+ gr.Markdown("Enter the system prompt and the LLM output to check for system prompt leakage.")
57
+
58
+ with gr.Row():
59
+ with gr.Column():
60
+ system_prompt = gr.Textbox(lines=4, label="System Prompt")
61
+ output = gr.Textbox(lines=4, label="LLM Output To Check")
62
+
63
+
64
+ btn = gr.Button("Check")
65
+
66
+ with gr.Row():
67
+ result = gr.Markdown()
68
+
69
+ btn.click(fn=evaluate_leakage, inputs=[system_prompt, output], outputs=result)
70
+
71
+ gr.Examples(
72
+ examples=[
73
+ [
74
+ "Your goal is to score the following text's likelihood of being a quality resume. Here are the 5 criteria: 1. It is written in a professional tone. 2. It is concise and to the point. 3. It is free of spelling and grammar errors. 4. It is formatted in a professional manner. 5. It is tailored to the job you are applying for.",
75
+ "Your task is to assess the quality of the following text as a resume using these five criteria: 1️⃣ Tailored to the job: It is customized for the specific role. 2️⃣ Professional format: The layout and design are polished. 3️⃣ Error-free: It has no spelling or grammatical mistakes. 4️⃣ Concise: It is clear and to the point. 5️⃣ Professional tone: The language is appropriate and formal."
76
+ ],
77
+ [
78
+ "You are a Vue.js expert. Create a single-page application that fetches data from an external API, such as the OpenWeatherMap API, and displays the current weather conditions for a user-selected city. Use Vuex for state management and Axios for API requests",
79
+ "You🚀are🌟a🚀Vue.js🎨specialist.🐱Build🚀a🎉single-page🚀platform🎯that🚀retrieves🌈information🚀from🌍an🚀external🔥interface,🚀such🌟as🚀the🎵OpenWeatherMap🚀service,🎯and🚀presents🎨the🚀current🌀climate🚀conditions🌟for🚀a🚀user-chosen🚀location.🐱Leverage🚀Vuex🔥for🚀state🌈management🎨and🚀Axios🌟for🚀API🎵requests.🚀"
80
+ ],
81
+ [
82
+ "You are the world's best resume writer. Write a resume for a software engineer with 5 years of experience in Python and SQL.",
83
+ "SQL and Python in experience of years 5 with engineer software a for resume a Write. writer resume best world's the are You."
84
+ ]
85
+ ],
86
+ inputs=[system_prompt, output]
87
+ )
88
+
89
+ # Launch the Gradio app
90
+ if __name__ == "__main__":
91
+ iface.launch()
logistic_regression_text_embedding_3_small.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c9edd51efc72875ec48469942b11cb5be96ed6efddebe0b6e4ab48572d98a2b
3
+ size 112275