Spaces:

KolumbusLindh
/

LLM-as-a-judge

Sleeping

LLM-as-a-judge / app.py

Kolumbus Lindh

all things

04d9cf4 7 months ago

3.38 kB

	import gradio as gr
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download

	# Load the model
	def load_model():
	repo_id = "KolumbusLindh/LoRA-4100"
	model_file = "unsloth.F16.gguf"

	local_path = hf_hub_download(repo_id=repo_id, filename=model_file)
	print(f"Loading model from: {local_path}")
	return Llama(model_path=local_path, n_ctx=2048, n_threads=8)

	print("Starting model loading...")
	model = load_model()
	print("Model loaded successfully!")

	# Function to generate and evaluate content
	def generate_and_evaluate(preconfigured_prompt):
	# Step 1: Generate content
	generation_prompt = [
	{"role": "user", "content": preconfigured_prompt}
	]
	generated_response = model.create_chat_completion(
	messages=generation_prompt,
	max_tokens=256,
	temperature=1.5
	)
	generated_content = generated_response['choices'][0]['message']['content']

	# Step 2: Evaluate the generated content
	evaluation_prompt = [
	{"role": "system", "content": "You are a strict language evaluator who provides binary assessments of texts."},
	{"role": "user", "content": f"""Carefully evaluate the generated story:
	Prompt: {preconfigured_prompt}
	Generated response: {generated_content}
	Provide a clear evaluation as follows:
	For each question, write the full question followed by your "Yes" or "No" answer.
	Example format:
	1. Is the story exactly 50 words? - Yes
	2. Does the story contain the letter 'a'? - No
	Now answer these questions:
	1. Is the story exactly 50 words?
	2. Does the story contain the letter 'a'?
	3. Does the story contain the word "alabaster"?
	4. Does the reader understand that the cat's name is Alabaster?
	5. Is the story 100% in English?
	6. Does the text rhyme?"""}
	]
	evaluation_response = model.create_chat_completion(
	messages=evaluation_prompt,
	max_tokens=128,
	temperature=0.2
	)
	evaluation_results = evaluation_response['choices'][0]['message']['content']

	return generated_content, evaluation_results

	# Preconfigured prompt
	PRECONFIGURED_PROMPT = """Write a story about the cat Alabaster. It should be exactly 50 words and you are not allowed to use the letter 'a'. The reader must understand that the cat's name is Alabaster. Only replacing the letter 'a' with something like "_" is not enough. The text should rhyme."""

	# Gradio interface
	with gr.Blocks(title="LLM as a Judge") as demo:
	gr.Markdown("## LLM as a Judge 🧐")

	generate_evaluate_button = gr.Button("Judge the LLM!")

	# Label for the preconfigured prompt
	gr.Label("Preconfigured prompt:")
	gr.Label(PRECONFIGURED_PROMPT)

	generated_output = gr.Textbox(
	label="Generated Content",
	placeholder="The generated content will appear here...",
	lines=5,
	interactive=False
	)

	evaluation_output = gr.Textbox(
	label="Evaluation Results",
	placeholder="The evaluation results will appear here...",
	lines=8,
	interactive=False
	)

	# Link generation and evaluation
	generate_evaluate_button.click(
	fn=generate_and_evaluate,
	inputs=[gr.State(PRECONFIGURED_PROMPT)],
	outputs=[generated_output, evaluation_output]
	)

	# Launch the app
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False
	)