Commit
·
8f2b0ed
1
Parent(s):
6d546ef
Header Changes
Browse files
1_Auto_Generate_Prompts.py → 1_Auto_Generate_Prompts_Using_HI_Model.py
RENAMED
@@ -27,12 +27,18 @@ scroll_css = """
|
|
27 |
"""
|
28 |
st.markdown(scroll_css, unsafe_allow_html=True)
|
29 |
|
30 |
-
st.title("Auto
|
31 |
st.markdown(
|
32 |
"""
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
"""
|
37 |
)
|
38 |
|
|
|
27 |
"""
|
28 |
st.markdown(scroll_css, unsafe_allow_html=True)
|
29 |
|
30 |
+
st.title("Auto Generate Prompts Using HI Model")
|
31 |
st.markdown(
|
32 |
"""
|
33 |
+
Humane Intelligence’s Auto Red Teaming prototype is built to empower clients to run red-teaming exercises on their AI applications using HI’s intuitive no-code/low-code platform.
|
34 |
+
|
35 |
+
The system generates adversarial prompts via a model trained on proprietary HI data, targeting potential vulnerabilities in the client’s models or applications. These responses are then evaluated by a separate judge LLM, also trained by HI.
|
36 |
+
|
37 |
+
Specifically, the prototype follows these steps:
|
38 |
+
1. Generates adversarial prompts based on a selected **bias category** and **country/region** using HI’s pre-trained model.
|
39 |
+
2. Selects the most effective prompts and feeds them into the client’s model to elicit responses.
|
40 |
+
3. Uses a dedicated HI-trained judge LLM to assess the responses.
|
41 |
+
4. Produces a final output that includes a **probability score** and a **justification** for each response.
|
42 |
"""
|
43 |
)
|
44 |
|
pages/{2_Select_Best_Prompts.py → 2_Select_Best_Prompts_For_Input_.py}
RENAMED
@@ -21,7 +21,7 @@ scroll_css = """
|
|
21 |
"""
|
22 |
st.markdown(scroll_css, unsafe_allow_html=True)
|
23 |
|
24 |
-
st.title("Select Best Prompts")
|
25 |
|
26 |
def extract_json_content(markdown_str: str) -> str:
|
27 |
lines = markdown_str.splitlines()
|
|
|
21 |
"""
|
22 |
st.markdown(scroll_css, unsafe_allow_html=True)
|
23 |
|
24 |
+
st.title("Select Best Prompts for Input in Client Model")
|
25 |
|
26 |
def extract_json_content(markdown_str: str) -> str:
|
27 |
lines = markdown_str.splitlines()
|
pages/3_Client_Response.py
CHANGED
@@ -19,7 +19,7 @@ scroll_css = """
|
|
19 |
|
20 |
st.markdown(scroll_css, unsafe_allow_html=True)
|
21 |
|
22 |
-
st.title("Client Response (Answering)")
|
23 |
|
24 |
# Use best_samples if available; otherwise, fallback to the interactive single sample.
|
25 |
if "best_samples" in st.session_state:
|
|
|
19 |
|
20 |
st.markdown(scroll_css, unsafe_allow_html=True)
|
21 |
|
22 |
+
st.title("Client Model Response (Answering)")
|
23 |
|
24 |
# Use best_samples if available; otherwise, fallback to the interactive single sample.
|
25 |
if "best_samples" in st.session_state:
|
pages/4_Evaluation_Report.py
CHANGED
@@ -6,8 +6,20 @@ import json
|
|
6 |
from openai import OpenAI
|
7 |
|
8 |
|
|
|
9 |
st.set_page_config(layout="wide")
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
def extract_json_from_text(text: str) -> str:
|
13 |
"""
|
|
|
6 |
from openai import OpenAI
|
7 |
|
8 |
|
9 |
+
|
10 |
st.set_page_config(layout="wide")
|
11 |
+
scroll_css = """
|
12 |
+
<style>
|
13 |
+
.table-scroll {
|
14 |
+
overflow-x: auto;
|
15 |
+
width: 100%;
|
16 |
+
max-width: 100%;
|
17 |
+
}
|
18 |
+
</style>
|
19 |
+
"""
|
20 |
+
st.markdown(scroll_css, unsafe_allow_html=True)
|
21 |
+
|
22 |
+
st.title("Evaluation Response using HI Judge LLM")
|
23 |
|
24 |
def extract_json_from_text(text: str) -> str:
|
25 |
"""
|