Spaces:

KolumbusLindh
/

LLM-as-a-judge

Sleeping

App Files Files Community

Kolumbus Lindh commited on Dec 9, 2024

Commit

1de90bd

1 Parent(s): ec08b2a

updates

Browse files

Files changed (1) hide show

app.py +31 -17

app.py CHANGED Viewed

@@ -2,21 +2,35 @@ import gradio as gr
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
-# Load the model
-def load_model():
     repo_id = "KolumbusLindh/LoRA-4100"
     model_file = "unsloth.F16.gguf"
     local_path = hf_hub_download(repo_id=repo_id, filename=model_file)
-    print(f"Loading model from: {local_path}")
     return Llama(model_path=local_path, n_ctx=2048, n_threads=8)
-print("Starting model loading...")
-model = load_model()
-print("Model loaded successfully!")
-# Function to evaluate two responses
-def evaluate_responses(prompt, response_a, response_b, evaluation_criteria):
     # Format the evaluation prompt
     evaluation_prompt = [
         {"role": "system", "content": "You are an objective and thorough evaluator of instruction-based responses."},
@@ -33,7 +47,7 @@ For each criterion, provide a rating of the responses on a scale from 1 to 10, a
     ]
     # Generate the evaluation
-    evaluation_response = model.create_chat_completion(
         messages=evaluation_prompt,
         max_tokens=512,
         temperature=0.5
@@ -47,12 +61,12 @@ For each criterion, provide a rating of the responses on a scale from 1 to 10, a
 with gr.Blocks(title="LLM as a Judge") as demo:
     gr.Markdown("## LLM as a Judge 🧐")
-    # Input fields for the prompt, two responses, and selection of criteria
     prompt_input = gr.Textbox(label="Enter the Prompt", placeholder="Enter the prompt here...", lines=3)
-    response_a_input = gr.Textbox(label="Response A", placeholder="Enter Response A here...", lines=5)
-    response_b_input = gr.Textbox(label="Response B", placeholder="Enter Response B here...", lines=5)
-    # Dropdown for selecting evaluation criteria
     criteria_dropdown = gr.Dropdown(
         label="Select Evaluation Criteria",
         choices=["Clarity", "Completeness", "Accuracy", "Relevance", "User-Friendliness", "Depth", "Creativity"],
@@ -60,10 +74,10 @@ with gr.Blocks(title="LLM as a Judge") as demo:
         type="value"
     )
-    # Button to start the evaluation
-    evaluate_button = gr.Button("Evaluate Responses")
-    # Label for displaying the evaluation results
     evaluation_output = gr.Textbox(
         label="Evaluation Results",
         placeholder="The evaluation results will appear here...",
@@ -74,7 +88,7 @@ with gr.Blocks(title="LLM as a Judge") as demo:
     # Link evaluation function to the button
     evaluate_button.click(
         fn=evaluate_responses,
-        inputs=[prompt_input, response_a_input, response_b_input, criteria_dropdown],
         outputs=[evaluation_output]
     )

 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
+# Load LoRA-4100 model for evaluation
+def load_lora_model():
     repo_id = "KolumbusLindh/LoRA-4100"
     model_file = "unsloth.F16.gguf"
     local_path = hf_hub_download(repo_id=repo_id, filename=model_file)
+    print(f"Loading LoRA model from: {local_path}")
     return Llama(model_path=local_path, n_ctx=2048, n_threads=8)
+lora_model = load_lora_model()
+print("LoRA model loaded successfully!")
+# Load user-specified model
+def load_user_model(model_path):
+    print(f"Loading user model from: {model_path}")
+    return Llama(model_path=model_path, n_ctx=2048, n_threads=8)
+# Generate response using a specified model and prompt
+def generate_response(model_path, prompt):
+    user_model = load_user_model(model_path)
+    response = user_model(prompt, max_tokens=256, temperature=0.7)
+    return response["choices"][0]["text"]
+# Evaluate responses using the LoRA model
+def evaluate_responses(prompt, model_a_path, model_b_path, evaluation_criteria):
+    # Generate responses
+    response_a = generate_response(model_a_path, prompt)
+    response_b = generate_response(model_b_path, prompt)
     # Format the evaluation prompt
     evaluation_prompt = [
         {"role": "system", "content": "You are an objective and thorough evaluator of instruction-based responses."},
     ]
     # Generate the evaluation
+    evaluation_response = lora_model.create_chat_completion(
         messages=evaluation_prompt,
         max_tokens=512,
         temperature=0.5
 with gr.Blocks(title="LLM as a Judge") as demo:
     gr.Markdown("## LLM as a Judge 🧐")
+    # Inputs for model paths, prompt, and evaluation criteria
+    model_a_input = gr.Textbox(label="Model A Path or URL", placeholder="Enter the path or URL to Model A...")
+    model_b_input = gr.Textbox(label="Model B Path or URL", placeholder="Enter the path or URL to Model B...")
     prompt_input = gr.Textbox(label="Enter the Prompt", placeholder="Enter the prompt here...", lines=3)
+    # Dropdown for evaluation criteria
     criteria_dropdown = gr.Dropdown(
         label="Select Evaluation Criteria",
         choices=["Clarity", "Completeness", "Accuracy", "Relevance", "User-Friendliness", "Depth", "Creativity"],
         type="value"
     )
+    # Button to evaluate responses
+    evaluate_button = gr.Button("Evaluate Models")
+    # Output for evaluation results
     evaluation_output = gr.Textbox(
         label="Evaluation Results",
         placeholder="The evaluation results will appear here...",
     # Link evaluation function to the button
     evaluate_button.click(
         fn=evaluate_responses,
+        inputs=[prompt_input, model_a_input, model_b_input, criteria_dropdown],
         outputs=[evaluation_output]
     )