Spaces:
Sleeping
Sleeping
import gradio as gr | |
from llama_cpp import Llama | |
from huggingface_hub import hf_hub_download | |
# Function to load a user-specified model from Hugging Face | |
def load_user_model(repo_id, model_file): | |
print(f"Downloading model {model_file} from repository {repo_id}...") | |
local_path = hf_hub_download(repo_id=repo_id, filename=model_file) | |
print(f"Model downloaded to: {local_path}") | |
return Llama(model_path=local_path, n_ctx=2048, n_threads=8) | |
# Generate a response using the specified model and prompt | |
def generate_response(model, prompt): | |
response = model(prompt, max_tokens=256, temperature=0.7) | |
return response["choices"][0]["text"] | |
# Evaluate responses generated by two models using the LoRA evaluation model | |
def evaluate_responses(prompt, repo_a, model_a, repo_b, model_b, evaluation_criteria): | |
# Load user-specified models | |
model_a_instance = load_user_model(repo_a, model_a) | |
model_b_instance = load_user_model(repo_b, model_b) | |
# Generate responses | |
response_a = generate_response(model_a_instance, prompt) | |
response_b = generate_response(model_b_instance, prompt) | |
print(f"Response A: {response_a}") | |
print(f"Response B: {response_b}") | |
# Format the evaluation prompt for the LoRA model | |
evaluation_prompt = f""" | |
Prompt: {prompt} | |
Response A: {response_a} | |
Response B: {response_b} | |
Evaluation Criteria: {evaluation_criteria} | |
Please evaluate the responses based on the criteria above. Rate each response on a scale from 1 to 10 for each criterion and provide a detailed explanation. Finally, declare a winner or state 'draw' if they are equal. | |
""" | |
# Use the LoRA model to evaluate the responses | |
evaluation_response = lora_model.create_completion( | |
prompt=evaluation_prompt, | |
max_tokens=512, | |
temperature=0.5 | |
) | |
return evaluation_response["choices"][0]["text"] | |
# Load the base LoRA evaluation model | |
def load_lora_model(): | |
repo_id = "KolumbusLindh/LoRA-4100" | |
model_file = "unsloth.F16.gguf" | |
print(f"Downloading LoRA evaluation model from repository {repo_id}...") | |
local_path = hf_hub_download(repo_id=repo_id, filename=model_file) | |
print(f"LoRA evaluation model downloaded to: {local_path}") | |
return Llama(model_path=local_path, n_ctx=2048, n_threads=8) | |
lora_model = load_lora_model() | |
print("LoRA evaluation model loaded successfully!") | |
# Gradio interface | |
with gr.Blocks(title="LLM as a Judge") as demo: | |
gr.Markdown("## LLM as a Judge π§") | |
# Inputs for Model A repository and file | |
repo_a_input = gr.Textbox(label="Model A Repository (e.g., KolumbusLindh/LoRA-4100)", placeholder="Enter the Hugging Face repo name for Model A...") | |
model_a_input = gr.Textbox(label="Model A File Name (e.g., unsloth.F16.gguf)", placeholder="Enter the model filename for Model A...") | |
# Inputs for Model B repository and file | |
repo_b_input = gr.Textbox(label="Model B Repository (e.g., KolumbusLindh/LoRA-4100)", placeholder="Enter the Hugging Face repo name for Model B...") | |
model_b_input = gr.Textbox(label="Model B File Name (e.g., unsloth.F16.gguf)", placeholder="Enter the model filename for Model B...") | |
# Input for prompt and evaluation criteria | |
prompt_input = gr.Textbox(label="Enter Prompt", placeholder="Enter the prompt here...", lines=3) | |
criteria_dropdown = gr.Dropdown( | |
label="Select Evaluation Criteria", | |
choices=["Clarity", "Completeness", "Accuracy", "Relevance", "User-Friendliness", "Depth", "Creativity"], | |
value="Clarity", | |
type="value" | |
) | |
# Button to evaluate responses | |
evaluate_button = gr.Button("Evaluate Models") | |
# Output for evaluation results | |
evaluation_output = gr.Textbox( | |
label="Evaluation Results", | |
placeholder="The evaluation results will appear here...", | |
lines=10, | |
interactive=False | |
) | |
# Link the evaluation function to the button | |
evaluate_button.click( | |
fn=evaluate_responses, | |
inputs=[prompt_input, repo_a_input, model_a_input, repo_b_input, model_b_input, criteria_dropdown], | |
outputs=[evaluation_output] | |
) | |
# Launch the Gradio app | |
if __name__ == "__main__": | |
demo.launch() # Add share=True to create a public link | |