Kolumbus Lindh commited on
Commit
027f91a
·
1 Parent(s): 5781d4e
Files changed (1) hide show
  1. app.py +29 -29
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
 
5
- # Function to load a user-specified model from Hugging Face
6
  def load_user_model(repo_id, model_file):
7
  print(f"Downloading model {model_file} from repository {repo_id}...")
8
  local_path = hf_hub_download(repo_id=repo_id, filename=model_file)
@@ -14,9 +14,12 @@ def generate_response(model, prompt):
14
  response = model(prompt, max_tokens=256, temperature=0.7)
15
  return response["choices"][0]["text"]
16
 
17
- # Evaluate responses generated by two models using the LoRA evaluation model
18
- def evaluate_responses(prompt, repo_a, model_a, repo_b, model_b, criteria_list):
19
- # Load user-specified models
 
 
 
20
  model_a_instance = load_user_model(repo_a, model_a)
21
  model_b_instance = load_user_model(repo_b, model_b)
22
 
@@ -24,10 +27,12 @@ def evaluate_responses(prompt, repo_a, model_a, repo_b, model_b, criteria_list):
24
  response_a = generate_response(model_a_instance, prompt)
25
  response_b = generate_response(model_b_instance, prompt)
26
 
 
27
  print(f"Response A: {response_a}")
28
  print(f"Response B: {response_b}")
29
 
30
- # Format the evaluation prompt for the LoRA model
 
31
  evaluation_prompt = f"""
32
  Prompt: {prompt}
33
 
@@ -45,15 +50,16 @@ Please evaluate the responses based on the selected criteria. For each criterion
45
  temperature=0.5
46
  )
47
  evaluation_results = evaluation_response["choices"][0]["text"]
48
-
 
49
  final_output = f"""
50
- Response A:\n{response_a}\n\n
51
- Response B:\n{response_b}\n\n
52
- Evaluation Results:\n{evaluation_results}
53
- """
54
  return final_output
55
 
56
- # Load the base LoRA evaluation model
57
  def load_lora_model():
58
  repo_id = "KolumbusLindh/LoRA-4100"
59
  model_file = "unsloth.F16.gguf"
@@ -69,41 +75,35 @@ print("LoRA evaluation model loaded successfully!")
69
  with gr.Blocks(title="LLM as a Judge") as demo:
70
  gr.Markdown("## LLM as a Judge 🧐")
71
 
72
- # Inputs for Model A repository and file
73
- repo_a_input = gr.Textbox(label="Model A Repository (e.g., KolumbusLindh/LoRA-4100)", placeholder="Enter the Hugging Face repo name for Model A...")
74
- model_a_input = gr.Textbox(label="Model A File Name (e.g., unsloth.F16.gguf)", placeholder="Enter the model filename for Model A...")
75
-
76
- # Inputs for Model B repository and file
77
- repo_b_input = gr.Textbox(label="Model B Repository (e.g., KolumbusLindh/LoRA-4100)", placeholder="Enter the Hugging Face repo name for Model B...")
78
- model_b_input = gr.Textbox(label="Model B File Name (e.g., unsloth.F16.gguf)", placeholder="Enter the model filename for Model B...")
79
 
80
- # Input for prompt and evaluation criteria
81
  prompt_input = gr.Textbox(label="Enter Prompt", placeholder="Enter the prompt here...", lines=3)
82
  criteria_dropdown = gr.CheckboxGroup(
83
  label="Select Up to 3 Evaluation Criteria",
84
- choices=["Clarity", "Completeness", "Accuracy", "Relevance", "User-Friendliness", "Depth", "Creativity"],
85
- value=["Clarity"],
86
- max_choices=3
87
  )
88
 
89
- # Button to evaluate responses
90
  evaluate_button = gr.Button("Evaluate Models")
91
-
92
- # Output for evaluation results
93
  evaluation_output = gr.Textbox(
94
  label="Evaluation Results",
95
  placeholder="The evaluation results will appear here...",
96
- lines=10,
97
  interactive=False
98
  )
99
 
100
- # Link the evaluation function to the button
101
  evaluate_button.click(
102
  fn=evaluate_responses,
103
  inputs=[prompt_input, repo_a_input, model_a_input, repo_b_input, model_b_input, criteria_dropdown],
104
  outputs=[evaluation_output]
105
  )
106
 
107
- # Launch the Gradio app
108
  if __name__ == "__main__":
109
- demo.launch() # Add share=True to create a public link
 
2
  from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
 
5
+ # Load a user-specified model
6
  def load_user_model(repo_id, model_file):
7
  print(f"Downloading model {model_file} from repository {repo_id}...")
8
  local_path = hf_hub_download(repo_id=repo_id, filename=model_file)
 
14
  response = model(prompt, max_tokens=256, temperature=0.7)
15
  return response["choices"][0]["text"]
16
 
17
+ # Evaluate responses using the LoRA evaluation model
18
+ def evaluate_responses(prompt, repo_a, model_a, repo_b, model_b, evaluation_criteria):
19
+ if len(evaluation_criteria) > 3:
20
+ return "Error: Please select up to 3 evaluation criteria only."
21
+
22
+ # Load models
23
  model_a_instance = load_user_model(repo_a, model_a)
24
  model_b_instance = load_user_model(repo_b, model_b)
25
 
 
27
  response_a = generate_response(model_a_instance, prompt)
28
  response_b = generate_response(model_b_instance, prompt)
29
 
30
+ # Display generated responses
31
  print(f"Response A: {response_a}")
32
  print(f"Response B: {response_b}")
33
 
34
+ # Format the evaluation prompt
35
+ criteria_list = ", ".join(evaluation_criteria)
36
  evaluation_prompt = f"""
37
  Prompt: {prompt}
38
 
 
50
  temperature=0.5
51
  )
52
  evaluation_results = evaluation_response["choices"][0]["text"]
53
+
54
+ # Combine results for display
55
  final_output = f"""
56
+ Response A:\n{response_a}\n\n
57
+ Response B:\n{response_b}\n\n
58
+ Evaluation Results:\n{evaluation_results}
59
+ """
60
  return final_output
61
 
62
+ # Load the LoRA evaluation model
63
  def load_lora_model():
64
  repo_id = "KolumbusLindh/LoRA-4100"
65
  model_file = "unsloth.F16.gguf"
 
75
  with gr.Blocks(title="LLM as a Judge") as demo:
76
  gr.Markdown("## LLM as a Judge 🧐")
77
 
78
+ # Model inputs
79
+ repo_a_input = gr.Textbox(label="Model A Repository", placeholder="Enter the Hugging Face repo name for Model A...")
80
+ model_a_input = gr.Textbox(label="Model A File Name", placeholder="Enter the model filename for Model A...")
81
+ repo_b_input = gr.Textbox(label="Model B Repository", placeholder="Enter the Hugging Face repo name for Model B...")
82
+ model_b_input = gr.Textbox(label="Model B File Name", placeholder="Enter the model filename for Model B...")
 
 
83
 
84
+ # Prompt and criteria inputs
85
  prompt_input = gr.Textbox(label="Enter Prompt", placeholder="Enter the prompt here...", lines=3)
86
  criteria_dropdown = gr.CheckboxGroup(
87
  label="Select Up to 3 Evaluation Criteria",
88
+ choices=["Clarity", "Completeness", "Accuracy", "Relevance", "User-Friendliness", "Depth", "Creativity"]
 
 
89
  )
90
 
91
+ # Button and outputs
92
  evaluate_button = gr.Button("Evaluate Models")
 
 
93
  evaluation_output = gr.Textbox(
94
  label="Evaluation Results",
95
  placeholder="The evaluation results will appear here...",
96
+ lines=20,
97
  interactive=False
98
  )
99
 
100
+ # Link evaluation function
101
  evaluate_button.click(
102
  fn=evaluate_responses,
103
  inputs=[prompt_input, repo_a_input, model_a_input, repo_b_input, model_b_input, criteria_dropdown],
104
  outputs=[evaluation_output]
105
  )
106
 
107
+ # Launch app
108
  if __name__ == "__main__":
109
+ demo.launch()