Kolumbus Lindh commited on
Commit
5781d4e
·
1 Parent(s): 7841304
Files changed (1) hide show
  1. app.py +26 -25
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
 
5
- # Load a user-specified model
6
  def load_user_model(repo_id, model_file):
7
  print(f"Downloading model {model_file} from repository {repo_id}...")
8
  local_path = hf_hub_download(repo_id=repo_id, filename=model_file)
@@ -14,9 +14,9 @@ def generate_response(model, prompt):
14
  response = model(prompt, max_tokens=256, temperature=0.7)
15
  return response["choices"][0]["text"]
16
 
17
- # Evaluate responses using the LoRA evaluation model
18
- def evaluate_responses(prompt, repo_a, model_a, repo_b, model_b, evaluation_criteria):
19
- # Load models
20
  model_a_instance = load_user_model(repo_a, model_a)
21
  model_b_instance = load_user_model(repo_b, model_b)
22
 
@@ -24,12 +24,10 @@ def evaluate_responses(prompt, repo_a, model_a, repo_b, model_b, evaluation_crit
24
  response_a = generate_response(model_a_instance, prompt)
25
  response_b = generate_response(model_b_instance, prompt)
26
 
27
- # Display generated responses
28
  print(f"Response A: {response_a}")
29
  print(f"Response B: {response_b}")
30
 
31
- # Format the evaluation prompt
32
- criteria_list = ", ".join(evaluation_criteria)
33
  evaluation_prompt = f"""
34
  Prompt: {prompt}
35
 
@@ -47,16 +45,15 @@ Please evaluate the responses based on the selected criteria. For each criterion
47
  temperature=0.5
48
  )
49
  evaluation_results = evaluation_response["choices"][0]["text"]
50
-
51
- # Combine results for display
52
  final_output = f"""
53
- Response A:\n{response_a}\n\n
54
- Response B:\n{response_b}\n\n
55
- Evaluation Results:\n{evaluation_results}
56
- """
57
  return final_output
58
 
59
- # Load the LoRA evaluation model
60
  def load_lora_model():
61
  repo_id = "KolumbusLindh/LoRA-4100"
62
  model_file = "unsloth.F16.gguf"
@@ -72,13 +69,15 @@ print("LoRA evaluation model loaded successfully!")
72
  with gr.Blocks(title="LLM as a Judge") as demo:
73
  gr.Markdown("## LLM as a Judge 🧐")
74
 
75
- # Model inputs
76
- repo_a_input = gr.Textbox(label="Model A Repository", placeholder="Enter the Hugging Face repo name for Model A...")
77
- model_a_input = gr.Textbox(label="Model A File Name", placeholder="Enter the model filename for Model A...")
78
- repo_b_input = gr.Textbox(label="Model B Repository", placeholder="Enter the Hugging Face repo name for Model B...")
79
- model_b_input = gr.Textbox(label="Model B File Name", placeholder="Enter the model filename for Model B...")
80
 
81
- # Prompt and criteria inputs
 
 
 
 
82
  prompt_input = gr.Textbox(label="Enter Prompt", placeholder="Enter the prompt here...", lines=3)
83
  criteria_dropdown = gr.CheckboxGroup(
84
  label="Select Up to 3 Evaluation Criteria",
@@ -87,22 +86,24 @@ with gr.Blocks(title="LLM as a Judge") as demo:
87
  max_choices=3
88
  )
89
 
90
- # Button and outputs
91
  evaluate_button = gr.Button("Evaluate Models")
 
 
92
  evaluation_output = gr.Textbox(
93
  label="Evaluation Results",
94
  placeholder="The evaluation results will appear here...",
95
- lines=20,
96
  interactive=False
97
  )
98
 
99
- # Link evaluation function
100
  evaluate_button.click(
101
  fn=evaluate_responses,
102
  inputs=[prompt_input, repo_a_input, model_a_input, repo_b_input, model_b_input, criteria_dropdown],
103
  outputs=[evaluation_output]
104
  )
105
 
106
- # Launch app
107
  if __name__ == "__main__":
108
- demo.launch()
 
2
  from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
 
5
+ # Function to load a user-specified model from Hugging Face
6
  def load_user_model(repo_id, model_file):
7
  print(f"Downloading model {model_file} from repository {repo_id}...")
8
  local_path = hf_hub_download(repo_id=repo_id, filename=model_file)
 
14
  response = model(prompt, max_tokens=256, temperature=0.7)
15
  return response["choices"][0]["text"]
16
 
17
+ # Evaluate responses generated by two models using the LoRA evaluation model
18
+ def evaluate_responses(prompt, repo_a, model_a, repo_b, model_b, criteria_list):
19
+ # Load user-specified models
20
  model_a_instance = load_user_model(repo_a, model_a)
21
  model_b_instance = load_user_model(repo_b, model_b)
22
 
 
24
  response_a = generate_response(model_a_instance, prompt)
25
  response_b = generate_response(model_b_instance, prompt)
26
 
 
27
  print(f"Response A: {response_a}")
28
  print(f"Response B: {response_b}")
29
 
30
+ # Format the evaluation prompt for the LoRA model
 
31
  evaluation_prompt = f"""
32
  Prompt: {prompt}
33
 
 
45
  temperature=0.5
46
  )
47
  evaluation_results = evaluation_response["choices"][0]["text"]
48
+
 
49
  final_output = f"""
50
+ Response A:\n{response_a}\n\n
51
+ Response B:\n{response_b}\n\n
52
+ Evaluation Results:\n{evaluation_results}
53
+ """
54
  return final_output
55
 
56
+ # Load the base LoRA evaluation model
57
  def load_lora_model():
58
  repo_id = "KolumbusLindh/LoRA-4100"
59
  model_file = "unsloth.F16.gguf"
 
69
  with gr.Blocks(title="LLM as a Judge") as demo:
70
  gr.Markdown("## LLM as a Judge 🧐")
71
 
72
+ # Inputs for Model A repository and file
73
+ repo_a_input = gr.Textbox(label="Model A Repository (e.g., KolumbusLindh/LoRA-4100)", placeholder="Enter the Hugging Face repo name for Model A...")
74
+ model_a_input = gr.Textbox(label="Model A File Name (e.g., unsloth.F16.gguf)", placeholder="Enter the model filename for Model A...")
 
 
75
 
76
+ # Inputs for Model B repository and file
77
+ repo_b_input = gr.Textbox(label="Model B Repository (e.g., KolumbusLindh/LoRA-4100)", placeholder="Enter the Hugging Face repo name for Model B...")
78
+ model_b_input = gr.Textbox(label="Model B File Name (e.g., unsloth.F16.gguf)", placeholder="Enter the model filename for Model B...")
79
+
80
+ # Input for prompt and evaluation criteria
81
  prompt_input = gr.Textbox(label="Enter Prompt", placeholder="Enter the prompt here...", lines=3)
82
  criteria_dropdown = gr.CheckboxGroup(
83
  label="Select Up to 3 Evaluation Criteria",
 
86
  max_choices=3
87
  )
88
 
89
+ # Button to evaluate responses
90
  evaluate_button = gr.Button("Evaluate Models")
91
+
92
+ # Output for evaluation results
93
  evaluation_output = gr.Textbox(
94
  label="Evaluation Results",
95
  placeholder="The evaluation results will appear here...",
96
+ lines=10,
97
  interactive=False
98
  )
99
 
100
+ # Link the evaluation function to the button
101
  evaluate_button.click(
102
  fn=evaluate_responses,
103
  inputs=[prompt_input, repo_a_input, model_a_input, repo_b_input, model_b_input, criteria_dropdown],
104
  outputs=[evaluation_output]
105
  )
106
 
107
+ # Launch the Gradio app
108
  if __name__ == "__main__":
109
+ demo.launch() # Add share=True to create a public link