Spaces:
Sleeping
Sleeping
Ouz
commited on
Commit
·
8628f17
1
Parent(s):
973cabd
first draft
Browse files- README.md +8 -6
- app.py +100 -0
- requirements.txt +2 -0
README.md
CHANGED
@@ -1,13 +1,15 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
colorFrom: pink
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 5.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
short_description: Demo of
|
|
|
|
|
11 |
---
|
12 |
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Custom Judges Demo
|
3 |
+
emoji: ⚡
|
4 |
colorFrom: pink
|
5 |
+
colorTo: red
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 5.13.2
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
+
short_description: Demo of custom LLM-Judges
|
11 |
+
thumbnail: >-
|
12 |
+
https://cdn-uploads.huggingface.co/production/uploads/64881e5bfb6ba030cb511a03/0yI0ZWI2dGATL8km7pjI-.png
|
13 |
---
|
14 |
|
15 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from root import RootSignals
|
3 |
+
|
4 |
+
client = None # Initialize client as None
|
5 |
+
custom_judge = None # Initialize custom judge as None
|
6 |
+
|
7 |
+
def initialize_client(api_key):
|
8 |
+
global client
|
9 |
+
return RootSignals(api_key=api_key)
|
10 |
+
|
11 |
+
def create_judge(api_key, judge_name, intent, judge_prompt):
|
12 |
+
global client, custom_judge
|
13 |
+
if not client:
|
14 |
+
client = initialize_client(api_key)
|
15 |
+
|
16 |
+
# Create custom judge
|
17 |
+
custom_judge = client.evaluators.create(
|
18 |
+
name=judge_name,
|
19 |
+
predicate=judge_prompt + " {{output}}",
|
20 |
+
intent=intent,
|
21 |
+
model="gpt-4o",
|
22 |
+
)
|
23 |
+
|
24 |
+
# Update the visibility of the evaluation and results sections
|
25 |
+
eval_section.visible = True
|
26 |
+
results_section.visible = True
|
27 |
+
evaluate_btn.visible = True
|
28 |
+
return gr.Info("Custom LLM-Judge is created successfully!")
|
29 |
+
|
30 |
+
def evaluate_response(llm_response):
|
31 |
+
global client, custom_judge
|
32 |
+
if not client or not custom_judge:
|
33 |
+
return "Please create a judge first", "Please create a judge first"
|
34 |
+
|
35 |
+
# Run evaluation using custom judge
|
36 |
+
evaluation_result = custom_judge.run(response=llm_response)
|
37 |
+
score = evaluation_result.score
|
38 |
+
justification = evaluation_result.justification
|
39 |
+
return score, justification
|
40 |
+
|
41 |
+
# Create the interface with a custom layout
|
42 |
+
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
|
43 |
+
gr.HTML("""<a href="https://api.visitorbadge.io/api/visitors?path=https://huggingface.co/spaces/root-signals/RootEvaluatorsDemo">
|
44 |
+
<img src="https://api.visitorbadge.io/api/visitors?path=https://huggingface.co/spaces/root-signals/RootEvaluatorsDemo" />
|
45 |
+
</a>""")
|
46 |
+
|
47 |
+
with gr.Row():
|
48 |
+
gr.Image(value="https://app.rootsignals.ai/images/root-signals-color.svg", height=70)
|
49 |
+
gr.Markdown("<div> </div>") # Add some space below the image
|
50 |
+
gr.Markdown("# Custom Judge Demo by Root Signals")
|
51 |
+
|
52 |
+
gr.Markdown("[Sign-up](https://app.rootsignals.ai/register) to create your API key!")
|
53 |
+
|
54 |
+
api_key = gr.Textbox(
|
55 |
+
label="🔑 Root Signals API Key",
|
56 |
+
placeholder="Enter your Root Signals API key...",
|
57 |
+
type="password",
|
58 |
+
show_label=True,
|
59 |
+
)
|
60 |
+
|
61 |
+
with gr.Row():
|
62 |
+
# Left column - Judge Creation
|
63 |
+
with gr.Column():
|
64 |
+
gr.Markdown("### Create Custom Judge")
|
65 |
+
judge_name = gr.Textbox(label="👨⚖️ Judge Name", placeholder="Enter a name for your custom judge...", interactive=True)
|
66 |
+
user_intent = gr.Textbox(label="👤 Intent", placeholder="Enter the high-level intent for this judge...", interactive=True)
|
67 |
+
judge_prompt = gr.Textbox(label="📝 Custom Judge Prompt", placeholder="Enter the custom judge prompt...", interactive=True)
|
68 |
+
create_judge_btn = gr.Button("✨ CREATE JUDGE", variant="primary")
|
69 |
+
info_message = gr.Info()
|
70 |
+
|
71 |
+
# Evaluation section (initially hidden)
|
72 |
+
eval_section = gr.Column(visible=False)
|
73 |
+
with eval_section:
|
74 |
+
gr.Markdown("### Evaluate Response")
|
75 |
+
llm_response = gr.Textbox(label="🤖 LLM Response", placeholder="Enter the LLM response to be evaluated...", interactive=True)
|
76 |
+
evaluate_btn = gr.Button("🧐 EVALUATE", variant="primary", visible=False)
|
77 |
+
|
78 |
+
# Right column - Results
|
79 |
+
results_section = gr.Column(visible=False)
|
80 |
+
with results_section:
|
81 |
+
score = gr.Textbox(label="📊 Score (between 0 and 1)", interactive=False)
|
82 |
+
justification = gr.TextArea(label="💬 Justification", interactive=False)
|
83 |
+
|
84 |
+
# Button click events
|
85 |
+
create_judge_btn.click(
|
86 |
+
fn=create_judge,
|
87 |
+
inputs=[api_key, judge_name, user_intent, judge_prompt],
|
88 |
+
outputs=info_message
|
89 |
+
)
|
90 |
+
|
91 |
+
evaluate_btn.click(
|
92 |
+
fn=evaluate_response,
|
93 |
+
inputs=[llm_response],
|
94 |
+
outputs=[score, justification]
|
95 |
+
)
|
96 |
+
|
97 |
+
gr.Markdown("[Homepage](https://www.rootsignals.ai/) | [Python SDK Docs](https://sdk.rootsignals.ai/en/latest/)")
|
98 |
+
|
99 |
+
if __name__ == "__main__":
|
100 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
root-signals
|