Spaces:
Sleeping
Sleeping
Kolumbus Lindh
commited on
Commit
·
1de90bd
1
Parent(s):
ec08b2a
updates
Browse files
app.py
CHANGED
@@ -2,21 +2,35 @@ import gradio as gr
|
|
2 |
from llama_cpp import Llama
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
|
5 |
-
# Load
|
6 |
-
def
|
7 |
repo_id = "KolumbusLindh/LoRA-4100"
|
8 |
model_file = "unsloth.F16.gguf"
|
9 |
|
10 |
local_path = hf_hub_download(repo_id=repo_id, filename=model_file)
|
11 |
-
print(f"Loading model from: {local_path}")
|
12 |
return Llama(model_path=local_path, n_ctx=2048, n_threads=8)
|
13 |
|
14 |
-
|
15 |
-
model
|
16 |
-
print("Model loaded successfully!")
|
17 |
|
18 |
-
#
|
19 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
# Format the evaluation prompt
|
21 |
evaluation_prompt = [
|
22 |
{"role": "system", "content": "You are an objective and thorough evaluator of instruction-based responses."},
|
@@ -33,7 +47,7 @@ For each criterion, provide a rating of the responses on a scale from 1 to 10, a
|
|
33 |
]
|
34 |
|
35 |
# Generate the evaluation
|
36 |
-
evaluation_response =
|
37 |
messages=evaluation_prompt,
|
38 |
max_tokens=512,
|
39 |
temperature=0.5
|
@@ -47,12 +61,12 @@ For each criterion, provide a rating of the responses on a scale from 1 to 10, a
|
|
47 |
with gr.Blocks(title="LLM as a Judge") as demo:
|
48 |
gr.Markdown("## LLM as a Judge 🧐")
|
49 |
|
50 |
-
#
|
|
|
|
|
51 |
prompt_input = gr.Textbox(label="Enter the Prompt", placeholder="Enter the prompt here...", lines=3)
|
52 |
-
response_a_input = gr.Textbox(label="Response A", placeholder="Enter Response A here...", lines=5)
|
53 |
-
response_b_input = gr.Textbox(label="Response B", placeholder="Enter Response B here...", lines=5)
|
54 |
|
55 |
-
# Dropdown for
|
56 |
criteria_dropdown = gr.Dropdown(
|
57 |
label="Select Evaluation Criteria",
|
58 |
choices=["Clarity", "Completeness", "Accuracy", "Relevance", "User-Friendliness", "Depth", "Creativity"],
|
@@ -60,10 +74,10 @@ with gr.Blocks(title="LLM as a Judge") as demo:
|
|
60 |
type="value"
|
61 |
)
|
62 |
|
63 |
-
# Button to
|
64 |
-
evaluate_button = gr.Button("Evaluate
|
65 |
|
66 |
-
#
|
67 |
evaluation_output = gr.Textbox(
|
68 |
label="Evaluation Results",
|
69 |
placeholder="The evaluation results will appear here...",
|
@@ -74,7 +88,7 @@ with gr.Blocks(title="LLM as a Judge") as demo:
|
|
74 |
# Link evaluation function to the button
|
75 |
evaluate_button.click(
|
76 |
fn=evaluate_responses,
|
77 |
-
inputs=[prompt_input,
|
78 |
outputs=[evaluation_output]
|
79 |
)
|
80 |
|
|
|
2 |
from llama_cpp import Llama
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
|
5 |
+
# Load LoRA-4100 model for evaluation
|
6 |
+
def load_lora_model():
|
7 |
repo_id = "KolumbusLindh/LoRA-4100"
|
8 |
model_file = "unsloth.F16.gguf"
|
9 |
|
10 |
local_path = hf_hub_download(repo_id=repo_id, filename=model_file)
|
11 |
+
print(f"Loading LoRA model from: {local_path}")
|
12 |
return Llama(model_path=local_path, n_ctx=2048, n_threads=8)
|
13 |
|
14 |
+
lora_model = load_lora_model()
|
15 |
+
print("LoRA model loaded successfully!")
|
|
|
16 |
|
17 |
+
# Load user-specified model
|
18 |
+
def load_user_model(model_path):
|
19 |
+
print(f"Loading user model from: {model_path}")
|
20 |
+
return Llama(model_path=model_path, n_ctx=2048, n_threads=8)
|
21 |
+
|
22 |
+
# Generate response using a specified model and prompt
|
23 |
+
def generate_response(model_path, prompt):
|
24 |
+
user_model = load_user_model(model_path)
|
25 |
+
response = user_model(prompt, max_tokens=256, temperature=0.7)
|
26 |
+
return response["choices"][0]["text"]
|
27 |
+
|
28 |
+
# Evaluate responses using the LoRA model
|
29 |
+
def evaluate_responses(prompt, model_a_path, model_b_path, evaluation_criteria):
|
30 |
+
# Generate responses
|
31 |
+
response_a = generate_response(model_a_path, prompt)
|
32 |
+
response_b = generate_response(model_b_path, prompt)
|
33 |
+
|
34 |
# Format the evaluation prompt
|
35 |
evaluation_prompt = [
|
36 |
{"role": "system", "content": "You are an objective and thorough evaluator of instruction-based responses."},
|
|
|
47 |
]
|
48 |
|
49 |
# Generate the evaluation
|
50 |
+
evaluation_response = lora_model.create_chat_completion(
|
51 |
messages=evaluation_prompt,
|
52 |
max_tokens=512,
|
53 |
temperature=0.5
|
|
|
61 |
with gr.Blocks(title="LLM as a Judge") as demo:
|
62 |
gr.Markdown("## LLM as a Judge 🧐")
|
63 |
|
64 |
+
# Inputs for model paths, prompt, and evaluation criteria
|
65 |
+
model_a_input = gr.Textbox(label="Model A Path or URL", placeholder="Enter the path or URL to Model A...")
|
66 |
+
model_b_input = gr.Textbox(label="Model B Path or URL", placeholder="Enter the path or URL to Model B...")
|
67 |
prompt_input = gr.Textbox(label="Enter the Prompt", placeholder="Enter the prompt here...", lines=3)
|
|
|
|
|
68 |
|
69 |
+
# Dropdown for evaluation criteria
|
70 |
criteria_dropdown = gr.Dropdown(
|
71 |
label="Select Evaluation Criteria",
|
72 |
choices=["Clarity", "Completeness", "Accuracy", "Relevance", "User-Friendliness", "Depth", "Creativity"],
|
|
|
74 |
type="value"
|
75 |
)
|
76 |
|
77 |
+
# Button to evaluate responses
|
78 |
+
evaluate_button = gr.Button("Evaluate Models")
|
79 |
|
80 |
+
# Output for evaluation results
|
81 |
evaluation_output = gr.Textbox(
|
82 |
label="Evaluation Results",
|
83 |
placeholder="The evaluation results will appear here...",
|
|
|
88 |
# Link evaluation function to the button
|
89 |
evaluate_button.click(
|
90 |
fn=evaluate_responses,
|
91 |
+
inputs=[prompt_input, model_a_input, model_b_input, criteria_dropdown],
|
92 |
outputs=[evaluation_output]
|
93 |
)
|
94 |
|