Spaces:
Sleeping
Sleeping
import gradio as gr | |
from llama_cpp import Llama | |
from huggingface_hub import hf_hub_download | |
# Load the model | |
def load_model(): | |
repo_id = "KolumbusLindh/LoRA-4100" | |
model_file = "unsloth.F16.gguf" | |
local_path = hf_hub_download(repo_id=repo_id, filename=model_file) | |
print(f"Loading model from: {local_path}") | |
return Llama(model_path=local_path, n_ctx=2048, n_threads=8) | |
print("Starting model loading...") | |
model = load_model() | |
print("Model loaded successfully!") | |
# Function to evaluate two responses | |
def evaluate_responses(prompt, response_a, response_b, evaluation_criteria): | |
# Format the evaluation prompt | |
evaluation_prompt = [ | |
{"role": "system", "content": "You are an objective and thorough evaluator of instruction-based responses."}, | |
{"role": "user", "content": f""" | |
Prompt: {prompt} | |
Response A: {response_a} | |
Response B: {response_b} | |
Please evaluate both responses based on the following criteria: {evaluation_criteria} | |
For each criterion, provide a rating of the responses on a scale from 1 to 10, and explain why each response earned that rating. Then, declare a winner (or 'draw' if both are equal). | |
"""} | |
] | |
# Generate the evaluation | |
evaluation_response = model.create_chat_completion( | |
messages=evaluation_prompt, | |
max_tokens=512, | |
temperature=0.5 | |
) | |
evaluation_results = evaluation_response['choices'][0]['message']['content'] | |
return evaluation_results | |
# Gradio interface | |
with gr.Blocks(title="LLM as a Judge") as demo: | |
gr.Markdown("## LLM as a Judge π§") | |
# Input fields for the prompt, two responses, and selection of criteria | |
prompt_input = gr.Textbox(label="Enter the Prompt", placeholder="Enter the prompt here...", lines=3) | |
response_a_input = gr.Textbox(label="Response A", placeholder="Enter Response A here...", lines=5) | |
response_b_input = gr.Textbox(label="Response B", placeholder="Enter Response B here...", lines=5) | |
# Dropdown for selecting evaluation criteria | |
criteria_dropdown = gr.Dropdown( | |
label="Select Evaluation Criteria", | |
choices=["Clarity", "Completeness", "Accuracy", "Relevance", "User-Friendliness", "Depth", "Creativity"], | |
value="Clarity", | |
type="value" | |
) | |
# Button to start the evaluation | |
evaluate_button = gr.Button("Evaluate Responses") | |
# Label for displaying the evaluation results | |
evaluation_output = gr.Textbox( | |
label="Evaluation Results", | |
placeholder="The evaluation results will appear here...", | |
lines=10, | |
interactive=False | |
) | |
# Link evaluation function to the button | |
evaluate_button.click( | |
fn=evaluate_responses, | |
inputs=[prompt_input, response_a_input, response_b_input, criteria_dropdown], | |
outputs=[evaluation_output] | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
demo.launch() | |