File size: 5,040 Bytes
8628f17
 
 
e333fa4
 
ebeb9b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d3e0b87
ebeb9b4
8628f17
 
 
 
 
d3e0b87
8628f17
d3e0b87
 
 
8628f17
 
 
 
 
 
d3e0b87
 
 
8628f17
 
d3e0b87
8628f17
d3e0b87
8628f17
d3e0b87
 
 
8628f17
d3e0b87
8628f17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e984be7
8628f17
 
 
 
 
 
 
 
ebeb9b4
 
d3e0b87
ebeb9b4
d3e0b87
ebeb9b4
 
 
 
d3e0b87
ebeb9b4
 
 
 
 
 
 
 
 
8628f17
ebeb9b4
8628f17
d3e0b87
ebeb9b4
 
d3e0b87
 
ebeb9b4
 
 
 
 
8628f17
 
ebeb9b4
d3e0b87
8628f17
 
 
 
 
 
d3e0b87
8628f17
 
 
 
 
d3e0b87
8628f17
 
 
d3e0b87
8628f17
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import gradio as gr
from root import RootSignals

client = None
custom_judge = None
MODELS = [
    "claude-3-5-sonnet",
    "claude-3-haiku-20240307",
    "claude-3-opus-20240229",
    "claude-3-sonnet-20240229",
    "codestral",
    "command-r",
    "command-r-plus",
    "fireworks_ai/llama-v3-70b-instruct",
    "gpt-4",
    "gpt-4o",
    "gpt-4o-mini",
    "gpt-4-turbo",
    "groq/llama3-70b-8192",
    "mistral-large-latest",
    "mistral-medium",
    "o1-mini",
    "o1-preview",
    "open-codestral-mamba",
    "RootJudge",
]

def initialize_client(api_key):
    global client
    return RootSignals(api_key=api_key)

def create_judge(api_key, judge_name, judge_prompt):
    global client, custom_judge
    if not api_key:
        return gr.Info("🔑 Please enter your Root Signals API key first!")
    
    if not client:
        client = initialize_client(api_key)
    
    # Create custom judge
    custom_judge = client.evaluators.create(
        name=judge_name,
        predicate=f'{judge_prompt}\n\nTEXT: {{{{response}}}}',
        intent=f"Intent: {judge_name}",
        model="gemini-2.0-flash",
    )
    
    return gr.Info(f"Your custom LLM-Judge '{judge_name}' is created successfully!")

def evaluate_response(api_key, llm_response):
    global client, custom_judge
    if not api_key:
        return gr.Info("🔑 Please enter your Root Signals API key first!"), ""
        
    if not client or not custom_judge:
        return gr.Info("Please create a judge first"), ""
    
    # Run evaluation using custom judge
    evaluation_result = custom_judge.run(response=llm_response)
    score = evaluation_result.score
    justification = evaluation_result.justification
    return score, justification

# Create the interface with a custom layout
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
    gr.HTML("""<a href="https://api.visitorbadge.io/api/visitors?path=https://huggingface.co/spaces/root-signals/RootEvaluatorsDemo">
               <img src="https://api.visitorbadge.io/api/visitors?path=https://huggingface.co/spaces/root-signals/RootEvaluatorsDemo" />
               </a>""")

    with gr.Row():
        gr.Image(value="https://app.rootsignals.ai/images/root-signals-color.svg", height=70)
        gr.Markdown("<div>&nbsp;</div>")  # Add some space below the image
    gr.Markdown("# Custom Judge Demo by Root Signals")

    gr.Markdown("[Sign-up](https://app.rootsignals.ai/register) to create your API key or [create a temporary one](https://app.rootsignals.ai/demo-user)!")
    
    api_key = gr.Textbox(
        label="🔑 Root Signals API Key",
        placeholder="Enter your Root Signals API key...",
        type="password",
        show_label=True,
    )
    
    gr.Markdown("---")  # Divider

    gr.Markdown("## Create Custom Judge")
    with gr.Row():
        judge_name = gr.Textbox(label="👨‍⚖️ Judge Name", value="Medical Jargon Judge", placeholder="Enter a name for your custom judge...", interactive=True)
    with gr.Row():
        judge_prompt = gr.Textbox(
            label="📝 Custom Judge Prompt",
            placeholder="Enter the custom judge prompt...",
            value="Evaluate the medical jargon use of a text. Higher scores mean the text include a lot of technical jargon such as drug names and very specific medical terminology.",
            interactive=True,
            lines=5,
            max_lines=10
        )
        create_judge_btn = gr.Button("✨ CREATE JUDGE", variant="primary")
    info_message = gr.Info()
    
    gr.Markdown("---")  # Divider
    
    with gr.Row():
        # Left column - Evaluation
        with gr.Column():
            gr.Markdown("## Execute")
            llm_response = gr.Textbox(
                label="🤖 LLM Response", 
                placeholder="Enter the LLM response to be evaluated...",
                value="This CCR5 co-receptor is used by almost all primary HIV-1 isolates regardless of viral genetic subtype.",
                interactive=True,
                lines=5,
                max_lines=10
            )
            evaluate_btn = gr.Button("🧐 EVALUATE", variant="primary", visible=True)
        
        # Right column - Results
        with gr.Column():
            gr.Markdown("## Results")
            score = gr.Textbox(label="📊 Score (between 0 and 1)", interactive=False)
            justification = gr.TextArea(label="💬 Justification", interactive=False)
    
    # Button click events
    create_judge_btn.click(
        fn=create_judge,
        inputs=[api_key, judge_name, judge_prompt],
        outputs=info_message
    )
    
    evaluate_btn.click(
        fn=evaluate_response,
        inputs=[api_key, llm_response],
        outputs=[score, justification]
    )

    gr.Markdown("[🌐 Homepage](https://www.rootsignals.ai/) | [🤖 Github Repo](https://sdk.rootsignals.ai/en/latest/) | [🐍 Python SDK Docs](https://sdk.rootsignals.ai/en/latest/) | [💬 Discord](https://discord.gg/EhazTQsFnj)")

if __name__ == "__main__":
    demo.launch()