Ouz commited on
Commit
8628f17
·
1 Parent(s): 973cabd

first draft

Browse files
Files changed (3) hide show
  1. README.md +8 -6
  2. app.py +100 -0
  3. requirements.txt +2 -0
README.md CHANGED
@@ -1,13 +1,15 @@
1
  ---
2
- title: CustomJudgeDemo
3
- emoji: 🐠
4
  colorFrom: pink
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 5.14.0
8
  app_file: app.py
9
  pinned: false
10
- short_description: Demo of Custom LLM-Judges using Root Signals
 
 
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Custom Judges Demo
3
+ emoji:
4
  colorFrom: pink
5
+ colorTo: red
6
  sdk: gradio
7
+ sdk_version: 5.13.2
8
  app_file: app.py
9
  pinned: false
10
+ short_description: Demo of custom LLM-Judges
11
+ thumbnail: >-
12
+ https://cdn-uploads.huggingface.co/production/uploads/64881e5bfb6ba030cb511a03/0yI0ZWI2dGATL8km7pjI-.png
13
  ---
14
 
15
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from root import RootSignals
3
+
4
+ client = None # Initialize client as None
5
+ custom_judge = None # Initialize custom judge as None
6
+
7
+ def initialize_client(api_key):
8
+ global client
9
+ return RootSignals(api_key=api_key)
10
+
11
+ def create_judge(api_key, judge_name, intent, judge_prompt):
12
+ global client, custom_judge
13
+ if not client:
14
+ client = initialize_client(api_key)
15
+
16
+ # Create custom judge
17
+ custom_judge = client.evaluators.create(
18
+ name=judge_name,
19
+ predicate=judge_prompt + " {{output}}",
20
+ intent=intent,
21
+ model="gpt-4o",
22
+ )
23
+
24
+ # Update the visibility of the evaluation and results sections
25
+ eval_section.visible = True
26
+ results_section.visible = True
27
+ evaluate_btn.visible = True
28
+ return gr.Info("Custom LLM-Judge is created successfully!")
29
+
30
+ def evaluate_response(llm_response):
31
+ global client, custom_judge
32
+ if not client or not custom_judge:
33
+ return "Please create a judge first", "Please create a judge first"
34
+
35
+ # Run evaluation using custom judge
36
+ evaluation_result = custom_judge.run(response=llm_response)
37
+ score = evaluation_result.score
38
+ justification = evaluation_result.justification
39
+ return score, justification
40
+
41
+ # Create the interface with a custom layout
42
+ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
43
+ gr.HTML("""<a href="https://api.visitorbadge.io/api/visitors?path=https://huggingface.co/spaces/root-signals/RootEvaluatorsDemo">
44
+ <img src="https://api.visitorbadge.io/api/visitors?path=https://huggingface.co/spaces/root-signals/RootEvaluatorsDemo" />
45
+ </a>""")
46
+
47
+ with gr.Row():
48
+ gr.Image(value="https://app.rootsignals.ai/images/root-signals-color.svg", height=70)
49
+ gr.Markdown("<div>&nbsp;</div>") # Add some space below the image
50
+ gr.Markdown("# Custom Judge Demo by Root Signals")
51
+
52
+ gr.Markdown("[Sign-up](https://app.rootsignals.ai/register) to create your API key!")
53
+
54
+ api_key = gr.Textbox(
55
+ label="🔑 Root Signals API Key",
56
+ placeholder="Enter your Root Signals API key...",
57
+ type="password",
58
+ show_label=True,
59
+ )
60
+
61
+ with gr.Row():
62
+ # Left column - Judge Creation
63
+ with gr.Column():
64
+ gr.Markdown("### Create Custom Judge")
65
+ judge_name = gr.Textbox(label="👨‍⚖️ Judge Name", placeholder="Enter a name for your custom judge...", interactive=True)
66
+ user_intent = gr.Textbox(label="👤 Intent", placeholder="Enter the high-level intent for this judge...", interactive=True)
67
+ judge_prompt = gr.Textbox(label="📝 Custom Judge Prompt", placeholder="Enter the custom judge prompt...", interactive=True)
68
+ create_judge_btn = gr.Button("✨ CREATE JUDGE", variant="primary")
69
+ info_message = gr.Info()
70
+
71
+ # Evaluation section (initially hidden)
72
+ eval_section = gr.Column(visible=False)
73
+ with eval_section:
74
+ gr.Markdown("### Evaluate Response")
75
+ llm_response = gr.Textbox(label="🤖 LLM Response", placeholder="Enter the LLM response to be evaluated...", interactive=True)
76
+ evaluate_btn = gr.Button("🧐 EVALUATE", variant="primary", visible=False)
77
+
78
+ # Right column - Results
79
+ results_section = gr.Column(visible=False)
80
+ with results_section:
81
+ score = gr.Textbox(label="📊 Score (between 0 and 1)", interactive=False)
82
+ justification = gr.TextArea(label="💬 Justification", interactive=False)
83
+
84
+ # Button click events
85
+ create_judge_btn.click(
86
+ fn=create_judge,
87
+ inputs=[api_key, judge_name, user_intent, judge_prompt],
88
+ outputs=info_message
89
+ )
90
+
91
+ evaluate_btn.click(
92
+ fn=evaluate_response,
93
+ inputs=[llm_response],
94
+ outputs=[score, justification]
95
+ )
96
+
97
+ gr.Markdown("[Homepage](https://www.rootsignals.ai/) | [Python SDK Docs](https://sdk.rootsignals.ai/en/latest/)")
98
+
99
+ if __name__ == "__main__":
100
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio
2
+ root-signals