Ouz commited on
Commit
d3e0b87
·
1 Parent(s): e984be7

Add better popups

Browse files
Files changed (1) hide show
  1. app.py +24 -16
app.py CHANGED
@@ -22,31 +22,38 @@ MODELS = [
22
  "o1-mini",
23
  "o1-preview",
24
  "open-codestral-mamba",
 
25
  ]
26
 
27
  def initialize_client(api_key):
28
  global client
29
  return RootSignals(api_key=api_key)
30
 
31
- def create_judge(api_key, judge_name, intent, judge_prompt):
32
  global client, custom_judge
 
 
 
33
  if not client:
34
  client = initialize_client(api_key)
35
 
36
  # Create custom judge
37
  custom_judge = client.evaluators.create(
38
  name=judge_name,
39
- predicate=judge_prompt + " {{output}}",
40
- intent=intent,
41
- model="gpt-4o",
42
  )
43
 
44
- return gr.Info(f"Custom LLM-Judge '{judge_name}' is created successfully!")
45
 
46
- def evaluate_response(llm_response):
47
  global client, custom_judge
 
 
 
48
  if not client or not custom_judge:
49
- return "Please create a judge first", "Please create a judge first"
50
 
51
  # Run evaluation using custom judge
52
  evaluation_result = custom_judge.run(response=llm_response)
@@ -76,14 +83,14 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
76
 
77
  gr.Markdown("---") # Divider
78
 
79
- gr.Markdown("### Create Custom Judge")
80
  with gr.Row():
81
- judge_name = gr.Textbox(label="👨‍⚖️ Judge Name", placeholder="Enter a name for your custom judge...", interactive=True)
82
- user_intent = gr.Textbox(label="👤 Intent", placeholder="Enter the high-level intent for this judge...", interactive=True)
83
  with gr.Row():
84
  judge_prompt = gr.Textbox(
85
  label="📝 Custom Judge Prompt",
86
  placeholder="Enter the custom judge prompt...",
 
87
  interactive=True,
88
  lines=5,
89
  max_lines=10
@@ -96,10 +103,11 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
96
  with gr.Row():
97
  # Left column - Evaluation
98
  with gr.Column():
99
- gr.Markdown("### Evaluate Response")
100
  llm_response = gr.Textbox(
101
  label="🤖 LLM Response",
102
- placeholder="Enter the LLM response to be evaluated...",
 
103
  interactive=True,
104
  lines=5,
105
  max_lines=10
@@ -108,24 +116,24 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
108
 
109
  # Right column - Results
110
  with gr.Column():
111
- gr.Markdown("### Results")
112
  score = gr.Textbox(label="📊 Score (between 0 and 1)", interactive=False)
113
  justification = gr.TextArea(label="💬 Justification", interactive=False)
114
 
115
  # Button click events
116
  create_judge_btn.click(
117
  fn=create_judge,
118
- inputs=[api_key, judge_name, user_intent, judge_prompt],
119
  outputs=info_message
120
  )
121
 
122
  evaluate_btn.click(
123
  fn=evaluate_response,
124
- inputs=[llm_response],
125
  outputs=[score, justification]
126
  )
127
 
128
- gr.Markdown("[Homepage](https://www.rootsignals.ai/) | [Python SDK Docs](https://sdk.rootsignals.ai/en/latest/)")
129
 
130
  if __name__ == "__main__":
131
  demo.launch()
 
22
  "o1-mini",
23
  "o1-preview",
24
  "open-codestral-mamba",
25
+ "RootJudge",
26
  ]
27
 
28
  def initialize_client(api_key):
29
  global client
30
  return RootSignals(api_key=api_key)
31
 
32
+ def create_judge(api_key, judge_name, judge_prompt):
33
  global client, custom_judge
34
+ if not api_key:
35
+ return gr.Info("🔑 Please enter your Root Signals API key first!")
36
+
37
  if not client:
38
  client = initialize_client(api_key)
39
 
40
  # Create custom judge
41
  custom_judge = client.evaluators.create(
42
  name=judge_name,
43
+ predicate=f'{judge_prompt}\n\nTEXT: {{{{response}}}}',
44
+ intent=f"Intent: {judge_name}",
45
+ model="gemini-2.0-flash",
46
  )
47
 
48
+ return gr.Info(f"Your custom LLM-Judge '{judge_name}' is created successfully!")
49
 
50
+ def evaluate_response(api_key, llm_response):
51
  global client, custom_judge
52
+ if not api_key:
53
+ return gr.Info("🔑 Please enter your Root Signals API key first!"), ""
54
+
55
  if not client or not custom_judge:
56
+ return gr.Info("Please create a judge first"), ""
57
 
58
  # Run evaluation using custom judge
59
  evaluation_result = custom_judge.run(response=llm_response)
 
83
 
84
  gr.Markdown("---") # Divider
85
 
86
+ gr.Markdown("## Create Custom Judge")
87
  with gr.Row():
88
+ judge_name = gr.Textbox(label="👨‍⚖️ Judge Name", value="Medical Jargon Judge", placeholder="Enter a name for your custom judge...", interactive=True)
 
89
  with gr.Row():
90
  judge_prompt = gr.Textbox(
91
  label="📝 Custom Judge Prompt",
92
  placeholder="Enter the custom judge prompt...",
93
+ value="Evaluate the medical jargon use of a text. Higher scores mean the text include a lot of technical jargon such as drug names and very specific medical terminology.",
94
  interactive=True,
95
  lines=5,
96
  max_lines=10
 
103
  with gr.Row():
104
  # Left column - Evaluation
105
  with gr.Column():
106
+ gr.Markdown("## Execute")
107
  llm_response = gr.Textbox(
108
  label="🤖 LLM Response",
109
+ placeholder="Enter the LLM response to be evaluated...",
110
+ value="This CCR5 co-receptor is used by almost all primary HIV-1 isolates regardless of viral genetic subtype.",
111
  interactive=True,
112
  lines=5,
113
  max_lines=10
 
116
 
117
  # Right column - Results
118
  with gr.Column():
119
+ gr.Markdown("## Results")
120
  score = gr.Textbox(label="📊 Score (between 0 and 1)", interactive=False)
121
  justification = gr.TextArea(label="💬 Justification", interactive=False)
122
 
123
  # Button click events
124
  create_judge_btn.click(
125
  fn=create_judge,
126
+ inputs=[api_key, judge_name, judge_prompt],
127
  outputs=info_message
128
  )
129
 
130
  evaluate_btn.click(
131
  fn=evaluate_response,
132
+ inputs=[api_key, llm_response],
133
  outputs=[score, justification]
134
  )
135
 
136
+ gr.Markdown("[🌐 Homepage](https://www.rootsignals.ai/) | [🤖 Github Repo](https://sdk.rootsignals.ai/en/latest/) | [🐍 Python SDK Docs](https://sdk.rootsignals.ai/en/latest/) | [💬 Discord](https://discord.gg/EhazTQsFnj)")
137
 
138
  if __name__ == "__main__":
139
  demo.launch()