LLM-as-a-judge / app.py
Kolumbus Lindh
all things
04d9cf4
raw
history blame
3.38 kB
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# Load the model
def load_model():
repo_id = "KolumbusLindh/LoRA-4100"
model_file = "unsloth.F16.gguf"
local_path = hf_hub_download(repo_id=repo_id, filename=model_file)
print(f"Loading model from: {local_path}")
return Llama(model_path=local_path, n_ctx=2048, n_threads=8)
print("Starting model loading...")
model = load_model()
print("Model loaded successfully!")
# Function to generate and evaluate content
def generate_and_evaluate(preconfigured_prompt):
# Step 1: Generate content
generation_prompt = [
{"role": "user", "content": preconfigured_prompt}
]
generated_response = model.create_chat_completion(
messages=generation_prompt,
max_tokens=256,
temperature=1.5
)
generated_content = generated_response['choices'][0]['message']['content']
# Step 2: Evaluate the generated content
evaluation_prompt = [
{"role": "system", "content": "You are a strict language evaluator who provides binary assessments of texts."},
{"role": "user", "content": f"""Carefully evaluate the generated story:
Prompt: {preconfigured_prompt}
Generated response: {generated_content}
Provide a clear evaluation as follows:
For each question, write the full question followed by your "Yes" or "No" answer.
Example format:
1. Is the story exactly 50 words? - Yes
2. Does the story contain the letter 'a'? - No
Now answer these questions:
1. Is the story exactly 50 words?
2. Does the story contain the letter 'a'?
3. Does the story contain the word "alabaster"?
4. Does the reader understand that the cat's name is Alabaster?
5. Is the story 100% in English?
6. Does the text rhyme?"""}
]
evaluation_response = model.create_chat_completion(
messages=evaluation_prompt,
max_tokens=128,
temperature=0.2
)
evaluation_results = evaluation_response['choices'][0]['message']['content']
return generated_content, evaluation_results
# Preconfigured prompt
PRECONFIGURED_PROMPT = """Write a story about the cat Alabaster. It should be exactly 50 words and you are not allowed to use the letter 'a'. The reader must understand that the cat's name is Alabaster. Only replacing the letter 'a' with something like "_" is not enough. The text should rhyme."""
# Gradio interface
with gr.Blocks(title="LLM as a Judge") as demo:
gr.Markdown("## LLM as a Judge 🧐")
generate_evaluate_button = gr.Button("Judge the LLM!")
# Label for the preconfigured prompt
gr.Label("Preconfigured prompt:")
gr.Label(PRECONFIGURED_PROMPT)
generated_output = gr.Textbox(
label="Generated Content",
placeholder="The generated content will appear here...",
lines=5,
interactive=False
)
evaluation_output = gr.Textbox(
label="Evaluation Results",
placeholder="The evaluation results will appear here...",
lines=8,
interactive=False
)
# Link generation and evaluation
generate_evaluate_button.click(
fn=generate_and_evaluate,
inputs=[gr.State(PRECONFIGURED_PROMPT)],
outputs=[generated_output, evaluation_output]
)
# Launch the app
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False
)