import gradio as gr from llama_cpp import Llama from huggingface_hub import hf_hub_download # Load the model def load_model(): repo_id = "KolumbusLindh/LoRA-4100" model_file = "unsloth.F16.gguf" local_path = hf_hub_download(repo_id=repo_id, filename=model_file) print(f"Loading model from: {local_path}") return Llama(model_path=local_path, n_ctx=2048, n_threads=8) print("Starting model loading...") model = load_model() print("Model loaded successfully!") # Function to evaluate two responses def evaluate_responses(prompt, response_a, response_b, evaluation_criteria): # Format the evaluation prompt evaluation_prompt = [ {"role": "system", "content": "You are an objective and thorough evaluator of instruction-based responses."}, {"role": "user", "content": f""" Prompt: {prompt} Response A: {response_a} Response B: {response_b} Please evaluate both responses based on the following criteria: {evaluation_criteria} For each criterion, provide a rating of the responses on a scale from 1 to 10, and explain why each response earned that rating. Then, declare a winner (or 'draw' if both are equal). """} ] # Generate the evaluation evaluation_response = model.create_chat_completion( messages=evaluation_prompt, max_tokens=512, temperature=0.5 ) evaluation_results = evaluation_response['choices'][0]['message']['content'] return evaluation_results # Gradio interface with gr.Blocks(title="LLM as a Judge") as demo: gr.Markdown("## LLM as a Judge 🧐") # Input fields for the prompt, two responses, and selection of criteria prompt_input = gr.Textbox(label="Enter the Prompt", placeholder="Enter the prompt here...", lines=3) response_a_input = gr.Textbox(label="Response A", placeholder="Enter Response A here...", lines=5) response_b_input = gr.Textbox(label="Response B", placeholder="Enter Response B here...", lines=5) # Dropdown for selecting evaluation criteria criteria_dropdown = gr.Dropdown( label="Select Evaluation Criteria", choices=["Clarity", "Completeness", "Accuracy", "Relevance", "User-Friendliness", "Depth", "Creativity"], value="Clarity", type="value" ) # Button to start the evaluation evaluate_button = gr.Button("Evaluate Responses") # Label for displaying the evaluation results evaluation_output = gr.Textbox( label="Evaluation Results", placeholder="The evaluation results will appear here...", lines=10, interactive=False ) # Link evaluation function to the button evaluate_button.click( fn=evaluate_responses, inputs=[prompt_input, response_a_input, response_b_input, criteria_dropdown], outputs=[evaluation_output] ) # Launch the app if __name__ == "__main__": demo.launch()