Spaces:
Sleeping
Sleeping
File size: 4,421 Bytes
04d9cf4 5781d4e 8f23865 04d9cf4 8f23865 04d9cf4 66cb564 1de90bd 5781d4e 8f23865 66cb564 1de90bd 8f23865 66cb564 1de90bd 5781d4e 66cb564 ec08b2a 7841304 ec08b2a 7841304 66cb564 ec08b2a 04d9cf4 7841304 5781d4e 7841304 5781d4e 7841304 04d9cf4 5781d4e 8f23865 04d9cf4 5781d4e 8f23865 5781d4e 66cb564 7841304 ec08b2a 7841304 04d9cf4 8f23865 5781d4e 1de90bd 5781d4e 04d9cf4 66cb564 5781d4e 04d9cf4 66cb564 5781d4e ec08b2a 8f23865 ec08b2a 04d9cf4 5781d4e ec08b2a 5781d4e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# Function to load a user-specified model from Hugging Face
def load_user_model(repo_id, model_file):
print(f"Downloading model {model_file} from repository {repo_id}...")
local_path = hf_hub_download(repo_id=repo_id, filename=model_file)
print(f"Model downloaded to: {local_path}")
return Llama(model_path=local_path, n_ctx=2048, n_threads=8)
# Generate a response using the specified model and prompt
def generate_response(model, prompt):
response = model(prompt, max_tokens=256, temperature=0.7)
return response["choices"][0]["text"]
# Evaluate responses generated by two models using the LoRA evaluation model
def evaluate_responses(prompt, repo_a, model_a, repo_b, model_b, criteria_list):
# Load user-specified models
model_a_instance = load_user_model(repo_a, model_a)
model_b_instance = load_user_model(repo_b, model_b)
# Generate responses
response_a = generate_response(model_a_instance, prompt)
response_b = generate_response(model_b_instance, prompt)
print(f"Response A: {response_a}")
print(f"Response B: {response_b}")
# Format the evaluation prompt for the LoRA model
evaluation_prompt = f"""
Prompt: {prompt}
Response A: {response_a}
Response B: {response_b}
Evaluation Criteria: {criteria_list}
Please evaluate the responses based on the selected criteria. For each criterion, rate both responses on a scale from 1 to 10 and provide a justification. Finally, declare the winner (or 'draw' if they are equal).
"""
# Use the LoRA model to evaluate the responses
evaluation_response = lora_model.create_completion(
prompt=evaluation_prompt,
max_tokens=512,
temperature=0.5
)
evaluation_results = evaluation_response["choices"][0]["text"]
final_output = f"""
Response A:\n{response_a}\n\n
Response B:\n{response_b}\n\n
Evaluation Results:\n{evaluation_results}
"""
return final_output
# Load the base LoRA evaluation model
def load_lora_model():
repo_id = "KolumbusLindh/LoRA-4100"
model_file = "unsloth.F16.gguf"
print(f"Downloading LoRA evaluation model from repository {repo_id}...")
local_path = hf_hub_download(repo_id=repo_id, filename=model_file)
print(f"LoRA evaluation model downloaded to: {local_path}")
return Llama(model_path=local_path, n_ctx=2048, n_threads=8)
lora_model = load_lora_model()
print("LoRA evaluation model loaded successfully!")
# Gradio interface
with gr.Blocks(title="LLM as a Judge") as demo:
gr.Markdown("## LLM as a Judge 🧐")
# Inputs for Model A repository and file
repo_a_input = gr.Textbox(label="Model A Repository (e.g., KolumbusLindh/LoRA-4100)", placeholder="Enter the Hugging Face repo name for Model A...")
model_a_input = gr.Textbox(label="Model A File Name (e.g., unsloth.F16.gguf)", placeholder="Enter the model filename for Model A...")
# Inputs for Model B repository and file
repo_b_input = gr.Textbox(label="Model B Repository (e.g., KolumbusLindh/LoRA-4100)", placeholder="Enter the Hugging Face repo name for Model B...")
model_b_input = gr.Textbox(label="Model B File Name (e.g., unsloth.F16.gguf)", placeholder="Enter the model filename for Model B...")
# Input for prompt and evaluation criteria
prompt_input = gr.Textbox(label="Enter Prompt", placeholder="Enter the prompt here...", lines=3)
criteria_dropdown = gr.CheckboxGroup(
label="Select Up to 3 Evaluation Criteria",
choices=["Clarity", "Completeness", "Accuracy", "Relevance", "User-Friendliness", "Depth", "Creativity"],
value=["Clarity"],
max_choices=3
)
# Button to evaluate responses
evaluate_button = gr.Button("Evaluate Models")
# Output for evaluation results
evaluation_output = gr.Textbox(
label="Evaluation Results",
placeholder="The evaluation results will appear here...",
lines=10,
interactive=False
)
# Link the evaluation function to the button
evaluate_button.click(
fn=evaluate_responses,
inputs=[prompt_input, repo_a_input, model_a_input, repo_b_input, model_b_input, criteria_dropdown],
outputs=[evaluation_output]
)
# Launch the Gradio app
if __name__ == "__main__":
demo.launch() # Add share=True to create a public link
|