Spaces:
Sleeping
Sleeping
Ouz
commited on
Commit
·
d3e0b87
1
Parent(s):
e984be7
Add better popups
Browse files
app.py
CHANGED
@@ -22,31 +22,38 @@ MODELS = [
|
|
22 |
"o1-mini",
|
23 |
"o1-preview",
|
24 |
"open-codestral-mamba",
|
|
|
25 |
]
|
26 |
|
27 |
def initialize_client(api_key):
|
28 |
global client
|
29 |
return RootSignals(api_key=api_key)
|
30 |
|
31 |
-
def create_judge(api_key, judge_name,
|
32 |
global client, custom_judge
|
|
|
|
|
|
|
33 |
if not client:
|
34 |
client = initialize_client(api_key)
|
35 |
|
36 |
# Create custom judge
|
37 |
custom_judge = client.evaluators.create(
|
38 |
name=judge_name,
|
39 |
-
predicate=judge_prompt
|
40 |
-
intent=
|
41 |
-
model="
|
42 |
)
|
43 |
|
44 |
-
return gr.Info(f"
|
45 |
|
46 |
-
def evaluate_response(llm_response):
|
47 |
global client, custom_judge
|
|
|
|
|
|
|
48 |
if not client or not custom_judge:
|
49 |
-
return "Please create a judge first", "
|
50 |
|
51 |
# Run evaluation using custom judge
|
52 |
evaluation_result = custom_judge.run(response=llm_response)
|
@@ -76,14 +83,14 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
|
|
76 |
|
77 |
gr.Markdown("---") # Divider
|
78 |
|
79 |
-
gr.Markdown("
|
80 |
with gr.Row():
|
81 |
-
judge_name = gr.Textbox(label="👨⚖️ Judge Name", placeholder="Enter a name for your custom judge...", interactive=True)
|
82 |
-
user_intent = gr.Textbox(label="👤 Intent", placeholder="Enter the high-level intent for this judge...", interactive=True)
|
83 |
with gr.Row():
|
84 |
judge_prompt = gr.Textbox(
|
85 |
label="📝 Custom Judge Prompt",
|
86 |
placeholder="Enter the custom judge prompt...",
|
|
|
87 |
interactive=True,
|
88 |
lines=5,
|
89 |
max_lines=10
|
@@ -96,10 +103,11 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
|
|
96 |
with gr.Row():
|
97 |
# Left column - Evaluation
|
98 |
with gr.Column():
|
99 |
-
gr.Markdown("
|
100 |
llm_response = gr.Textbox(
|
101 |
label="🤖 LLM Response",
|
102 |
-
placeholder="Enter the LLM response to be evaluated...",
|
|
|
103 |
interactive=True,
|
104 |
lines=5,
|
105 |
max_lines=10
|
@@ -108,24 +116,24 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
|
|
108 |
|
109 |
# Right column - Results
|
110 |
with gr.Column():
|
111 |
-
gr.Markdown("
|
112 |
score = gr.Textbox(label="📊 Score (between 0 and 1)", interactive=False)
|
113 |
justification = gr.TextArea(label="💬 Justification", interactive=False)
|
114 |
|
115 |
# Button click events
|
116 |
create_judge_btn.click(
|
117 |
fn=create_judge,
|
118 |
-
inputs=[api_key, judge_name,
|
119 |
outputs=info_message
|
120 |
)
|
121 |
|
122 |
evaluate_btn.click(
|
123 |
fn=evaluate_response,
|
124 |
-
inputs=[llm_response],
|
125 |
outputs=[score, justification]
|
126 |
)
|
127 |
|
128 |
-
gr.Markdown("[Homepage](https://www.rootsignals.ai/) | [Python SDK Docs](https://sdk.rootsignals.ai/en/latest/)")
|
129 |
|
130 |
if __name__ == "__main__":
|
131 |
demo.launch()
|
|
|
22 |
"o1-mini",
|
23 |
"o1-preview",
|
24 |
"open-codestral-mamba",
|
25 |
+
"RootJudge",
|
26 |
]
|
27 |
|
28 |
def initialize_client(api_key):
|
29 |
global client
|
30 |
return RootSignals(api_key=api_key)
|
31 |
|
32 |
+
def create_judge(api_key, judge_name, judge_prompt):
|
33 |
global client, custom_judge
|
34 |
+
if not api_key:
|
35 |
+
return gr.Info("🔑 Please enter your Root Signals API key first!")
|
36 |
+
|
37 |
if not client:
|
38 |
client = initialize_client(api_key)
|
39 |
|
40 |
# Create custom judge
|
41 |
custom_judge = client.evaluators.create(
|
42 |
name=judge_name,
|
43 |
+
predicate=f'{judge_prompt}\n\nTEXT: {{{{response}}}}',
|
44 |
+
intent=f"Intent: {judge_name}",
|
45 |
+
model="gemini-2.0-flash",
|
46 |
)
|
47 |
|
48 |
+
return gr.Info(f"Your custom LLM-Judge '{judge_name}' is created successfully!")
|
49 |
|
50 |
+
def evaluate_response(api_key, llm_response):
|
51 |
global client, custom_judge
|
52 |
+
if not api_key:
|
53 |
+
return gr.Info("🔑 Please enter your Root Signals API key first!"), ""
|
54 |
+
|
55 |
if not client or not custom_judge:
|
56 |
+
return gr.Info("Please create a judge first"), ""
|
57 |
|
58 |
# Run evaluation using custom judge
|
59 |
evaluation_result = custom_judge.run(response=llm_response)
|
|
|
83 |
|
84 |
gr.Markdown("---") # Divider
|
85 |
|
86 |
+
gr.Markdown("## Create Custom Judge")
|
87 |
with gr.Row():
|
88 |
+
judge_name = gr.Textbox(label="👨⚖️ Judge Name", value="Medical Jargon Judge", placeholder="Enter a name for your custom judge...", interactive=True)
|
|
|
89 |
with gr.Row():
|
90 |
judge_prompt = gr.Textbox(
|
91 |
label="📝 Custom Judge Prompt",
|
92 |
placeholder="Enter the custom judge prompt...",
|
93 |
+
value="Evaluate the medical jargon use of a text. Higher scores mean the text include a lot of technical jargon such as drug names and very specific medical terminology.",
|
94 |
interactive=True,
|
95 |
lines=5,
|
96 |
max_lines=10
|
|
|
103 |
with gr.Row():
|
104 |
# Left column - Evaluation
|
105 |
with gr.Column():
|
106 |
+
gr.Markdown("## Execute")
|
107 |
llm_response = gr.Textbox(
|
108 |
label="🤖 LLM Response",
|
109 |
+
placeholder="Enter the LLM response to be evaluated...",
|
110 |
+
value="This CCR5 co-receptor is used by almost all primary HIV-1 isolates regardless of viral genetic subtype.",
|
111 |
interactive=True,
|
112 |
lines=5,
|
113 |
max_lines=10
|
|
|
116 |
|
117 |
# Right column - Results
|
118 |
with gr.Column():
|
119 |
+
gr.Markdown("## Results")
|
120 |
score = gr.Textbox(label="📊 Score (between 0 and 1)", interactive=False)
|
121 |
justification = gr.TextArea(label="💬 Justification", interactive=False)
|
122 |
|
123 |
# Button click events
|
124 |
create_judge_btn.click(
|
125 |
fn=create_judge,
|
126 |
+
inputs=[api_key, judge_name, judge_prompt],
|
127 |
outputs=info_message
|
128 |
)
|
129 |
|
130 |
evaluate_btn.click(
|
131 |
fn=evaluate_response,
|
132 |
+
inputs=[api_key, llm_response],
|
133 |
outputs=[score, justification]
|
134 |
)
|
135 |
|
136 |
+
gr.Markdown("[🌐 Homepage](https://www.rootsignals.ai/) | [🤖 Github Repo](https://sdk.rootsignals.ai/en/latest/) | [🐍 Python SDK Docs](https://sdk.rootsignals.ai/en/latest/) | [💬 Discord](https://discord.gg/EhazTQsFnj)")
|
137 |
|
138 |
if __name__ == "__main__":
|
139 |
demo.launch()
|