Spaces:

KolumbusLindh
/

LLM-as-a-judge

Sleeping

App Files Files Community

Kolumbus Lindh commited on Dec 10, 2024

Commit

47aec4f

1 Parent(s): ca0c241

updates

Browse files

Files changed (1) hide show

app.py +26 -19

app.py CHANGED Viewed

@@ -11,13 +11,13 @@ def load_user_model(repo_id, model_file):
 # Generate a response using the specified model and prompt
 def generate_response(model, prompt):
-    response = model(prompt, max_tokens=512, temperature=0.5)
     return response["choices"][0]["text"]
 # Evaluate responses using the LoRA evaluation model
 def evaluate_responses(prompt, repo_a, model_a, repo_b, model_b, evaluation_criteria):
     if len(evaluation_criteria) > 3:
-        return "Error: Please select up to 3 evaluation criteria only."
     # Load models
     model_a_instance = load_user_model(repo_a, model_a)
@@ -47,17 +47,12 @@ Please evaluate the responses based on the selected criteria. For each criterion
     evaluation_response = lora_model.create_completion(
         prompt=evaluation_prompt,
         max_tokens=512,
-        temperature=0.5
     )
     evaluation_results = evaluation_response["choices"][0]["text"]
-    # Combine results for display
-    final_output = f"""
-Response A:\n{response_a}\n\n
-Response B:\n{response_b}\n\n
-Evaluation Results:\n{evaluation_results}
-"""
-    return final_output
 # Load the LoRA evaluation model
 def load_lora_model():
@@ -73,26 +68,38 @@ print("LoRA evaluation model loaded successfully!")
 # Gradio interface
 with gr.Blocks(title="LLM as a Judge") as demo:
-    gr.Markdown("## LLM as a Judge 🧐")
     # Model inputs
-    repo_a_input = gr.Textbox(label="Model A Repository", placeholder="Enter the Hugging Face repo name for Model A...")
-    model_a_input = gr.Textbox(label="Model A File Name", placeholder="Enter the model filename for Model A...")
-    repo_b_input = gr.Textbox(label="Model B Repository", placeholder="Enter the Hugging Face repo name for Model B...")
-    model_b_input = gr.Textbox(label="Model B File Name", placeholder="Enter the model filename for Model B...")
     # Prompt and criteria inputs
     prompt_input = gr.Textbox(label="Enter Prompt", placeholder="Enter the prompt here...", lines=3)
     criteria_dropdown = gr.CheckboxGroup(
-        label="Select Up to 3 Evaluation Criteria",
-        choices=["Clarity", "Completeness", "Accuracy", "Relevance", "User-Friendliness", "Depth", "Creativity"]
     )
     # Button and outputs
     evaluate_button = gr.Button("Evaluate Models")
     evaluation_output = gr.Textbox(
         label="Evaluation Results",
-        placeholder="The evaluation results will appear here...",
         lines=20,
         interactive=False
     )
@@ -101,7 +108,7 @@ with gr.Blocks(title="LLM as a Judge") as demo:
     evaluate_button.click(
         fn=evaluate_responses,
         inputs=[prompt_input, repo_a_input, model_a_input, repo_b_input, model_b_input, criteria_dropdown],
-        outputs=[evaluation_output]
     )
 # Launch app

 # Generate a response using the specified model and prompt
 def generate_response(model, prompt):
+    response = model(prompt, max_tokens=512, temperature=0.5, top_p=0.95)
     return response["choices"][0]["text"]
 # Evaluate responses using the LoRA evaluation model
 def evaluate_responses(prompt, repo_a, model_a, repo_b, model_b, evaluation_criteria):
     if len(evaluation_criteria) > 3:
+        return "Error: Please select up to 3 evaluation criteria only.", "", ""
     # Load models
     model_a_instance = load_user_model(repo_a, model_a)
     evaluation_response = lora_model.create_completion(
         prompt=evaluation_prompt,
         max_tokens=512,
+        temperature=0.5,
+        top_p=0.95,
     )
     evaluation_results = evaluation_response["choices"][0]["text"]
+    return response_a, response_b, evaluation_results
 # Load the LoRA evaluation model
 def load_lora_model():
 # Gradio interface
 with gr.Blocks(title="LLM as a Judge") as demo:
+    gr.Markdown("## LLM as a Judge 𐄷")
     # Model inputs
+    repo_a_input = gr.Textbox(label="Model A Repository", placeholder="KolumbusLindh/LoRA-6150")
+    model_a_input = gr.Textbox(label="Model A File Name", placeholder="unsloth.F16.gguf")
+    repo_b_input = gr.Textbox(label="Model B Repository", placeholder="forestav/LoRA-2000")
+    model_b_input = gr.Textbox(label="Model B File Name", placeholder="unsloth.F16.gguf")
     # Prompt and criteria inputs
     prompt_input = gr.Textbox(label="Enter Prompt", placeholder="Enter the prompt here...", lines=3)
     criteria_dropdown = gr.CheckboxGroup(
+        label="Select Evaluation Criteria (Max 3)",
+        choices=["Clarity", "Completeness", "Accuracy"]  # Restricted criteria
     )
     # Button and outputs
     evaluate_button = gr.Button("Evaluate Models")
+    response_a_output = gr.Textbox(
+        label="Response A",
+        placeholder="Response from Model A will appear here...",
+        lines=10,
+        interactive=False
+    )
+    response_b_output = gr.Textbox(
+        label="Response B",
+        placeholder="Response from Model B will appear here...",
+        lines=10,
+        interactive=False
+    )
     evaluation_output = gr.Textbox(
         label="Evaluation Results",
+        placeholder="The evaluation analysis will appear here...",
         lines=20,
         interactive=False
     )
     evaluate_button.click(
         fn=evaluate_responses,
         inputs=[prompt_input, repo_a_input, model_a_input, repo_b_input, model_b_input, criteria_dropdown],
+        outputs=[response_a_output, response_b_output, evaluation_output]
     )
 # Launch app