Spaces:
Sleeping
Sleeping
Kolumbus Lindh
commited on
Commit
·
66cb564
1
Parent(s):
1de90bd
updates
Browse files
app.py
CHANGED
@@ -2,11 +2,10 @@ import gradio as gr
|
|
2 |
from llama_cpp import Llama
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
|
5 |
-
# Load
|
6 |
def load_lora_model():
|
7 |
repo_id = "KolumbusLindh/LoRA-4100"
|
8 |
model_file = "unsloth.F16.gguf"
|
9 |
-
|
10 |
local_path = hf_hub_download(repo_id=repo_id, filename=model_file)
|
11 |
print(f"Loading LoRA model from: {local_path}")
|
12 |
return Llama(model_path=local_path, n_ctx=2048, n_threads=8)
|
@@ -14,84 +13,77 @@ def load_lora_model():
|
|
14 |
lora_model = load_lora_model()
|
15 |
print("LoRA model loaded successfully!")
|
16 |
|
17 |
-
#
|
18 |
def load_user_model(model_path):
|
19 |
print(f"Loading user model from: {model_path}")
|
20 |
return Llama(model_path=model_path, n_ctx=2048, n_threads=8)
|
21 |
|
22 |
-
# Generate response using
|
23 |
-
def generate_response(
|
24 |
-
|
25 |
-
response = user_model(prompt, max_tokens=256, temperature=0.7)
|
26 |
return response["choices"][0]["text"]
|
27 |
|
28 |
-
# Evaluate responses using the LoRA model
|
29 |
def evaluate_responses(prompt, model_a_path, model_b_path, evaluation_criteria):
|
|
|
|
|
|
|
|
|
30 |
# Generate responses
|
31 |
-
response_a = generate_response(
|
32 |
-
response_b = generate_response(
|
|
|
|
|
|
|
33 |
|
34 |
-
# Format the evaluation prompt
|
35 |
-
evaluation_prompt =
|
36 |
-
{"role": "system", "content": "You are an objective and thorough evaluator of instruction-based responses."},
|
37 |
-
{"role": "user", "content": f"""
|
38 |
Prompt: {prompt}
|
39 |
|
40 |
Response A: {response_a}
|
41 |
Response B: {response_b}
|
42 |
|
43 |
-
|
44 |
|
45 |
-
|
46 |
-
"""
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
evaluation_response = lora_model.create_chat_completion(
|
51 |
-
messages=evaluation_prompt,
|
52 |
max_tokens=512,
|
53 |
temperature=0.5
|
54 |
)
|
55 |
-
|
56 |
-
evaluation_results = evaluation_response['choices'][0]['message']['content']
|
57 |
-
|
58 |
-
return evaluation_results
|
59 |
|
60 |
# Gradio interface
|
61 |
with gr.Blocks(title="LLM as a Judge") as demo:
|
62 |
gr.Markdown("## LLM as a Judge 🧐")
|
63 |
|
64 |
-
#
|
65 |
-
model_a_input = gr.Textbox(label="Model A Path or URL", placeholder="Enter the path or URL
|
66 |
-
model_b_input = gr.Textbox(label="Model B Path or URL", placeholder="Enter the path or URL
|
67 |
-
prompt_input = gr.Textbox(label="Enter
|
68 |
-
|
69 |
-
# Dropdown for evaluation criteria
|
70 |
criteria_dropdown = gr.Dropdown(
|
71 |
label="Select Evaluation Criteria",
|
72 |
choices=["Clarity", "Completeness", "Accuracy", "Relevance", "User-Friendliness", "Depth", "Creativity"],
|
73 |
value="Clarity",
|
74 |
type="value"
|
75 |
)
|
76 |
-
|
77 |
-
# Button to evaluate responses
|
78 |
evaluate_button = gr.Button("Evaluate Models")
|
79 |
-
|
80 |
-
# Output for evaluation results
|
81 |
evaluation_output = gr.Textbox(
|
82 |
-
label="Evaluation Results",
|
83 |
-
placeholder="The evaluation results will appear here...",
|
84 |
-
lines=10,
|
85 |
interactive=False
|
86 |
)
|
87 |
-
|
88 |
-
# Link evaluation function to the button
|
89 |
evaluate_button.click(
|
90 |
fn=evaluate_responses,
|
91 |
inputs=[prompt_input, model_a_input, model_b_input, criteria_dropdown],
|
92 |
outputs=[evaluation_output]
|
93 |
)
|
94 |
|
95 |
-
# Launch the app
|
96 |
if __name__ == "__main__":
|
97 |
demo.launch()
|
|
|
2 |
from llama_cpp import Llama
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
|
5 |
+
# Load the base LoRA evaluation model
|
6 |
def load_lora_model():
|
7 |
repo_id = "KolumbusLindh/LoRA-4100"
|
8 |
model_file = "unsloth.F16.gguf"
|
|
|
9 |
local_path = hf_hub_download(repo_id=repo_id, filename=model_file)
|
10 |
print(f"Loading LoRA model from: {local_path}")
|
11 |
return Llama(model_path=local_path, n_ctx=2048, n_threads=8)
|
|
|
13 |
lora_model = load_lora_model()
|
14 |
print("LoRA model loaded successfully!")
|
15 |
|
16 |
+
# Function to load a user-specified model
|
17 |
def load_user_model(model_path):
|
18 |
print(f"Loading user model from: {model_path}")
|
19 |
return Llama(model_path=model_path, n_ctx=2048, n_threads=8)
|
20 |
|
21 |
+
# Generate a response using the specified model and prompt
|
22 |
+
def generate_response(model, prompt):
|
23 |
+
response = model(prompt, max_tokens=256, temperature=0.7)
|
|
|
24 |
return response["choices"][0]["text"]
|
25 |
|
26 |
+
# Evaluate responses generated by two models using the LoRA model
|
27 |
def evaluate_responses(prompt, model_a_path, model_b_path, evaluation_criteria):
|
28 |
+
# Load user-specified models
|
29 |
+
model_a = load_user_model(model_a_path)
|
30 |
+
model_b = load_user_model(model_b_path)
|
31 |
+
|
32 |
# Generate responses
|
33 |
+
response_a = generate_response(model_a, prompt)
|
34 |
+
response_b = generate_response(model_b, prompt)
|
35 |
+
|
36 |
+
print(f"Response A: {response_a}")
|
37 |
+
print(f"Response B: {response_b}")
|
38 |
|
39 |
+
# Format the evaluation prompt for the LoRA model
|
40 |
+
evaluation_prompt = f"""
|
|
|
|
|
41 |
Prompt: {prompt}
|
42 |
|
43 |
Response A: {response_a}
|
44 |
Response B: {response_b}
|
45 |
|
46 |
+
Evaluation Criteria: {evaluation_criteria}
|
47 |
|
48 |
+
Please evaluate the responses based on the criteria above. Rate each response on a scale from 1 to 10 for each criterion and provide a detailed explanation. Finally, declare a winner or state 'draw' if they are equal.
|
49 |
+
"""
|
50 |
+
# Use the LoRA model to evaluate the responses
|
51 |
+
evaluation_response = lora_model.create_completion(
|
52 |
+
prompt=evaluation_prompt,
|
|
|
|
|
53 |
max_tokens=512,
|
54 |
temperature=0.5
|
55 |
)
|
56 |
+
return evaluation_response["choices"][0]["text"]
|
|
|
|
|
|
|
57 |
|
58 |
# Gradio interface
|
59 |
with gr.Blocks(title="LLM as a Judge") as demo:
|
60 |
gr.Markdown("## LLM as a Judge 🧐")
|
61 |
|
62 |
+
# User inputs for models, prompt, and evaluation criteria
|
63 |
+
model_a_input = gr.Textbox(label="Model A Path or URL", placeholder="Enter the path or URL for Model A...")
|
64 |
+
model_b_input = gr.Textbox(label="Model B Path or URL", placeholder="Enter the path or URL for Model B...")
|
65 |
+
prompt_input = gr.Textbox(label="Enter Prompt", placeholder="Enter the prompt here...", lines=3)
|
|
|
|
|
66 |
criteria_dropdown = gr.Dropdown(
|
67 |
label="Select Evaluation Criteria",
|
68 |
choices=["Clarity", "Completeness", "Accuracy", "Relevance", "User-Friendliness", "Depth", "Creativity"],
|
69 |
value="Clarity",
|
70 |
type="value"
|
71 |
)
|
|
|
|
|
72 |
evaluate_button = gr.Button("Evaluate Models")
|
|
|
|
|
73 |
evaluation_output = gr.Textbox(
|
74 |
+
label="Evaluation Results",
|
75 |
+
placeholder="The evaluation results will appear here...",
|
76 |
+
lines=10,
|
77 |
interactive=False
|
78 |
)
|
79 |
+
|
80 |
+
# Link the evaluation function to the button
|
81 |
evaluate_button.click(
|
82 |
fn=evaluate_responses,
|
83 |
inputs=[prompt_input, model_a_input, model_b_input, criteria_dropdown],
|
84 |
outputs=[evaluation_output]
|
85 |
)
|
86 |
|
87 |
+
# Launch the Gradio app
|
88 |
if __name__ == "__main__":
|
89 |
demo.launch()
|