Kolumbus Lindh commited on
Commit
1de90bd
·
1 Parent(s): ec08b2a
Files changed (1) hide show
  1. app.py +31 -17
app.py CHANGED
@@ -2,21 +2,35 @@ import gradio as gr
2
  from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
 
5
- # Load the model
6
- def load_model():
7
  repo_id = "KolumbusLindh/LoRA-4100"
8
  model_file = "unsloth.F16.gguf"
9
 
10
  local_path = hf_hub_download(repo_id=repo_id, filename=model_file)
11
- print(f"Loading model from: {local_path}")
12
  return Llama(model_path=local_path, n_ctx=2048, n_threads=8)
13
 
14
- print("Starting model loading...")
15
- model = load_model()
16
- print("Model loaded successfully!")
17
 
18
- # Function to evaluate two responses
19
- def evaluate_responses(prompt, response_a, response_b, evaluation_criteria):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  # Format the evaluation prompt
21
  evaluation_prompt = [
22
  {"role": "system", "content": "You are an objective and thorough evaluator of instruction-based responses."},
@@ -33,7 +47,7 @@ For each criterion, provide a rating of the responses on a scale from 1 to 10, a
33
  ]
34
 
35
  # Generate the evaluation
36
- evaluation_response = model.create_chat_completion(
37
  messages=evaluation_prompt,
38
  max_tokens=512,
39
  temperature=0.5
@@ -47,12 +61,12 @@ For each criterion, provide a rating of the responses on a scale from 1 to 10, a
47
  with gr.Blocks(title="LLM as a Judge") as demo:
48
  gr.Markdown("## LLM as a Judge 🧐")
49
 
50
- # Input fields for the prompt, two responses, and selection of criteria
 
 
51
  prompt_input = gr.Textbox(label="Enter the Prompt", placeholder="Enter the prompt here...", lines=3)
52
- response_a_input = gr.Textbox(label="Response A", placeholder="Enter Response A here...", lines=5)
53
- response_b_input = gr.Textbox(label="Response B", placeholder="Enter Response B here...", lines=5)
54
 
55
- # Dropdown for selecting evaluation criteria
56
  criteria_dropdown = gr.Dropdown(
57
  label="Select Evaluation Criteria",
58
  choices=["Clarity", "Completeness", "Accuracy", "Relevance", "User-Friendliness", "Depth", "Creativity"],
@@ -60,10 +74,10 @@ with gr.Blocks(title="LLM as a Judge") as demo:
60
  type="value"
61
  )
62
 
63
- # Button to start the evaluation
64
- evaluate_button = gr.Button("Evaluate Responses")
65
 
66
- # Label for displaying the evaluation results
67
  evaluation_output = gr.Textbox(
68
  label="Evaluation Results",
69
  placeholder="The evaluation results will appear here...",
@@ -74,7 +88,7 @@ with gr.Blocks(title="LLM as a Judge") as demo:
74
  # Link evaluation function to the button
75
  evaluate_button.click(
76
  fn=evaluate_responses,
77
- inputs=[prompt_input, response_a_input, response_b_input, criteria_dropdown],
78
  outputs=[evaluation_output]
79
  )
80
 
 
2
  from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
 
5
+ # Load LoRA-4100 model for evaluation
6
+ def load_lora_model():
7
  repo_id = "KolumbusLindh/LoRA-4100"
8
  model_file = "unsloth.F16.gguf"
9
 
10
  local_path = hf_hub_download(repo_id=repo_id, filename=model_file)
11
+ print(f"Loading LoRA model from: {local_path}")
12
  return Llama(model_path=local_path, n_ctx=2048, n_threads=8)
13
 
14
+ lora_model = load_lora_model()
15
+ print("LoRA model loaded successfully!")
 
16
 
17
+ # Load user-specified model
18
+ def load_user_model(model_path):
19
+ print(f"Loading user model from: {model_path}")
20
+ return Llama(model_path=model_path, n_ctx=2048, n_threads=8)
21
+
22
+ # Generate response using a specified model and prompt
23
+ def generate_response(model_path, prompt):
24
+ user_model = load_user_model(model_path)
25
+ response = user_model(prompt, max_tokens=256, temperature=0.7)
26
+ return response["choices"][0]["text"]
27
+
28
+ # Evaluate responses using the LoRA model
29
+ def evaluate_responses(prompt, model_a_path, model_b_path, evaluation_criteria):
30
+ # Generate responses
31
+ response_a = generate_response(model_a_path, prompt)
32
+ response_b = generate_response(model_b_path, prompt)
33
+
34
  # Format the evaluation prompt
35
  evaluation_prompt = [
36
  {"role": "system", "content": "You are an objective and thorough evaluator of instruction-based responses."},
 
47
  ]
48
 
49
  # Generate the evaluation
50
+ evaluation_response = lora_model.create_chat_completion(
51
  messages=evaluation_prompt,
52
  max_tokens=512,
53
  temperature=0.5
 
61
  with gr.Blocks(title="LLM as a Judge") as demo:
62
  gr.Markdown("## LLM as a Judge 🧐")
63
 
64
+ # Inputs for model paths, prompt, and evaluation criteria
65
+ model_a_input = gr.Textbox(label="Model A Path or URL", placeholder="Enter the path or URL to Model A...")
66
+ model_b_input = gr.Textbox(label="Model B Path or URL", placeholder="Enter the path or URL to Model B...")
67
  prompt_input = gr.Textbox(label="Enter the Prompt", placeholder="Enter the prompt here...", lines=3)
 
 
68
 
69
+ # Dropdown for evaluation criteria
70
  criteria_dropdown = gr.Dropdown(
71
  label="Select Evaluation Criteria",
72
  choices=["Clarity", "Completeness", "Accuracy", "Relevance", "User-Friendliness", "Depth", "Creativity"],
 
74
  type="value"
75
  )
76
 
77
+ # Button to evaluate responses
78
+ evaluate_button = gr.Button("Evaluate Models")
79
 
80
+ # Output for evaluation results
81
  evaluation_output = gr.Textbox(
82
  label="Evaluation Results",
83
  placeholder="The evaluation results will appear here...",
 
88
  # Link evaluation function to the button
89
  evaluate_button.click(
90
  fn=evaluate_responses,
91
+ inputs=[prompt_input, model_a_input, model_b_input, criteria_dropdown],
92
  outputs=[evaluation_output]
93
  )
94