Spaces:
Sleeping
Sleeping
File size: 5,040 Bytes
8628f17 e333fa4 ebeb9b4 d3e0b87 ebeb9b4 8628f17 d3e0b87 8628f17 d3e0b87 8628f17 d3e0b87 8628f17 d3e0b87 8628f17 d3e0b87 8628f17 d3e0b87 8628f17 d3e0b87 8628f17 e984be7 8628f17 ebeb9b4 d3e0b87 ebeb9b4 d3e0b87 ebeb9b4 d3e0b87 ebeb9b4 8628f17 ebeb9b4 8628f17 d3e0b87 ebeb9b4 d3e0b87 ebeb9b4 8628f17 ebeb9b4 d3e0b87 8628f17 d3e0b87 8628f17 d3e0b87 8628f17 d3e0b87 8628f17 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import gradio as gr
from root import RootSignals
client = None
custom_judge = None
MODELS = [
"claude-3-5-sonnet",
"claude-3-haiku-20240307",
"claude-3-opus-20240229",
"claude-3-sonnet-20240229",
"codestral",
"command-r",
"command-r-plus",
"fireworks_ai/llama-v3-70b-instruct",
"gpt-4",
"gpt-4o",
"gpt-4o-mini",
"gpt-4-turbo",
"groq/llama3-70b-8192",
"mistral-large-latest",
"mistral-medium",
"o1-mini",
"o1-preview",
"open-codestral-mamba",
"RootJudge",
]
def initialize_client(api_key):
global client
return RootSignals(api_key=api_key)
def create_judge(api_key, judge_name, judge_prompt):
global client, custom_judge
if not api_key:
return gr.Info("🔑 Please enter your Root Signals API key first!")
if not client:
client = initialize_client(api_key)
# Create custom judge
custom_judge = client.evaluators.create(
name=judge_name,
predicate=f'{judge_prompt}\n\nTEXT: {{{{response}}}}',
intent=f"Intent: {judge_name}",
model="gemini-2.0-flash",
)
return gr.Info(f"Your custom LLM-Judge '{judge_name}' is created successfully!")
def evaluate_response(api_key, llm_response):
global client, custom_judge
if not api_key:
return gr.Info("🔑 Please enter your Root Signals API key first!"), ""
if not client or not custom_judge:
return gr.Info("Please create a judge first"), ""
# Run evaluation using custom judge
evaluation_result = custom_judge.run(response=llm_response)
score = evaluation_result.score
justification = evaluation_result.justification
return score, justification
# Create the interface with a custom layout
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
gr.HTML("""<a href="https://api.visitorbadge.io/api/visitors?path=https://huggingface.co/spaces/root-signals/RootEvaluatorsDemo">
<img src="https://api.visitorbadge.io/api/visitors?path=https://huggingface.co/spaces/root-signals/RootEvaluatorsDemo" />
</a>""")
with gr.Row():
gr.Image(value="https://app.rootsignals.ai/images/root-signals-color.svg", height=70)
gr.Markdown("<div> </div>") # Add some space below the image
gr.Markdown("# Custom Judge Demo by Root Signals")
gr.Markdown("[Sign-up](https://app.rootsignals.ai/register) to create your API key or [create a temporary one](https://app.rootsignals.ai/demo-user)!")
api_key = gr.Textbox(
label="🔑 Root Signals API Key",
placeholder="Enter your Root Signals API key...",
type="password",
show_label=True,
)
gr.Markdown("---") # Divider
gr.Markdown("## Create Custom Judge")
with gr.Row():
judge_name = gr.Textbox(label="👨⚖️ Judge Name", value="Medical Jargon Judge", placeholder="Enter a name for your custom judge...", interactive=True)
with gr.Row():
judge_prompt = gr.Textbox(
label="📝 Custom Judge Prompt",
placeholder="Enter the custom judge prompt...",
value="Evaluate the medical jargon use of a text. Higher scores mean the text include a lot of technical jargon such as drug names and very specific medical terminology.",
interactive=True,
lines=5,
max_lines=10
)
create_judge_btn = gr.Button("✨ CREATE JUDGE", variant="primary")
info_message = gr.Info()
gr.Markdown("---") # Divider
with gr.Row():
# Left column - Evaluation
with gr.Column():
gr.Markdown("## Execute")
llm_response = gr.Textbox(
label="🤖 LLM Response",
placeholder="Enter the LLM response to be evaluated...",
value="This CCR5 co-receptor is used by almost all primary HIV-1 isolates regardless of viral genetic subtype.",
interactive=True,
lines=5,
max_lines=10
)
evaluate_btn = gr.Button("🧐 EVALUATE", variant="primary", visible=True)
# Right column - Results
with gr.Column():
gr.Markdown("## Results")
score = gr.Textbox(label="📊 Score (between 0 and 1)", interactive=False)
justification = gr.TextArea(label="💬 Justification", interactive=False)
# Button click events
create_judge_btn.click(
fn=create_judge,
inputs=[api_key, judge_name, judge_prompt],
outputs=info_message
)
evaluate_btn.click(
fn=evaluate_response,
inputs=[api_key, llm_response],
outputs=[score, justification]
)
gr.Markdown("[🌐 Homepage](https://www.rootsignals.ai/) | [🤖 Github Repo](https://sdk.rootsignals.ai/en/latest/) | [🐍 Python SDK Docs](https://sdk.rootsignals.ai/en/latest/) | [💬 Discord](https://discord.gg/EhazTQsFnj)")
if __name__ == "__main__":
demo.launch() |