Filip commited on
Commit
7ab2ec9
·
1 Parent(s): 7d909ce
Files changed (3) hide show
  1. .gitattributes +35 -0
  2. app.py +126 -0
  3. requirements.txt +3 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llama_cpp import Llama
3
+ from huggingface_hub import hf_hub_download
4
+
5
+ # Load a user-specified model
6
+ def load_user_model(repo_id, model_file):
7
+ print(f"Downloading model {model_file} from repository {repo_id}...")
8
+ local_path = hf_hub_download(repo_id=repo_id, filename=model_file)
9
+ print(f"Model downloaded to: {local_path}")
10
+ return Llama(model_path=local_path, n_ctx=2048, n_threads=8)
11
+
12
+ # Generate a response using the specified model and prompt
13
+ def generate_response(model, prompt):
14
+ response = model(prompt, max_tokens=512, temperature=0.5)
15
+ return response["choices"][0]["text"]
16
+
17
+ # Evaluate responses using the LoRA evaluation model
18
+ def evaluate_responses(prompt, repo_a, model_a, repo_b, model_b, evaluation_criteria):
19
+ if len(evaluation_criteria) > 3:
20
+ return "Error: Please select up to 3 evaluation criteria only."
21
+
22
+ # Load models
23
+ model_a_instance = load_user_model(repo_a, model_a)
24
+ model_b_instance = load_user_model(repo_b, model_b)
25
+
26
+ # Generate responses
27
+ response_a = generate_response(model_a_instance, prompt)
28
+ response_b = generate_response(model_b_instance, prompt)
29
+
30
+ # Display generated responses
31
+ print(f"Response A: {response_a}")
32
+ print(f"Response B: {response_b}")
33
+
34
+ # Format the evaluation prompt
35
+ criteria_list = ", ".join(evaluation_criteria)
36
+ evaluation_prompt = f"""
37
+ Prompt: {prompt}
38
+
39
+ Response A: {response_a}
40
+ Response B: {response_b}
41
+
42
+ Evaluation Criteria: {criteria_list}
43
+
44
+ Please evaluate the responses based on the selected criteria. For each criterion, rate both responses on a scale from 1 to 4 and provide a justification. Finally, declare the winner (or 'draw' if they are equal).
45
+ """
46
+ # Use the LoRA model to evaluate the responses
47
+ evaluation_response = lora_model.create_completion(
48
+ prompt=evaluation_prompt,
49
+ max_tokens=512,
50
+ temperature=0.5
51
+ )
52
+ evaluation_results = evaluation_response["choices"][0]["text"]
53
+
54
+ # Combine results for display
55
+ final_output = f"""
56
+ Evaluation Results:\n{evaluation_results}
57
+ """
58
+ return final_output, response_a, response_b
59
+
60
+ # Load the LoRA evaluation model
61
+ def load_lora_model():
62
+ repo_id = "KolumbusLindh/LoRA-4100"
63
+ model_file = "unsloth.F16.gguf"
64
+ print(f"Downloading LoRA evaluation model from repository {repo_id}...")
65
+ local_path = hf_hub_download(repo_id=repo_id, filename=model_file)
66
+ print(f"LoRA evaluation model downloaded to: {local_path}")
67
+ return Llama(model_path=local_path, n_ctx=2048, n_threads=8)
68
+
69
+ lora_model = load_lora_model()
70
+ print("LoRA evaluation model loaded successfully!")
71
+
72
+ # Gradio interface
73
+ with gr.Blocks(title="LLM as a Judge") as demo:
74
+ gr.Markdown("## LLM as a Judge 🧐")
75
+ gr.Markdown("Welcome to the LLM as a Judge demo! This application uses the LoRA model to evaluate responses generated by two different models based on user-specified criteria. You can select up to 3 evaluation criteria and provide a prompt to generate responses from the models. The LoRA model will then evaluate the responses based on the selected criteria and determine the winner.")
76
+
77
+ # Model inputs
78
+ repo_a_input = gr.Textbox(label="Model A Repository", placeholder="Enter the Hugging Face repo name for Model A...", value="forestav/gguf_lora_model")
79
+ model_a_input = gr.Textbox(label="Model A File Name", placeholder="Enter the model filename for Model A...", value="unsloth.F16.gguf")
80
+ repo_b_input = gr.Textbox(label="Model B Repository", placeholder="Enter the Hugging Face repo name for Model B...", value="KolumbusLindh/LoRA-4100")
81
+ model_b_input = gr.Textbox(label="Model B File Name", placeholder="Enter the model filename for Model B...", value="unsloth.F16.gguf")
82
+
83
+ # Prompt and criteria inputs
84
+ prompt_input = gr.Textbox(label="Enter Prompt", placeholder="Enter the prompt here...", lines=3)
85
+ criteria_dropdown = gr.CheckboxGroup(
86
+ label="Select Up to 3 Evaluation Criteria",
87
+ choices=["Clarity", "Completeness", "Accuracy", "Relevance", "User-Friendliness", "Depth", "Creativity"]
88
+ )
89
+
90
+ # Button and outputs
91
+ evaluate_button = gr.Button("Evaluate Models")
92
+
93
+ with gr.Row():
94
+ with gr.Column():
95
+ response_a = gr.Textbox(
96
+ label="Response A",
97
+ placeholder="The response for Model A will appear here...",
98
+ lines=20,
99
+ interactive=False
100
+ )
101
+
102
+ with gr.Column():
103
+ response_b = gr.Textbox(
104
+ label="Response B",
105
+ placeholder="The response for Model B will appear here...",
106
+ lines=20,
107
+ interactive=False
108
+ )
109
+
110
+ evaluation_output = gr.Textbox(
111
+ label="Evaluation Results",
112
+ placeholder="The evaluation results will appear here...",
113
+ lines=20,
114
+ interactive=False
115
+ )
116
+
117
+ # Link evaluation function
118
+ evaluate_button.click(
119
+ fn=evaluate_responses,
120
+ inputs=[prompt_input, repo_a_input, model_a_input, repo_b_input, model_b_input, criteria_dropdown],
121
+ outputs=[evaluation_output, response_a, response_b]
122
+ )
123
+
124
+ # Launch app
125
+ if __name__ == "__main__":
126
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ huggingface_hub==0.25.2
2
+ gradio
3
+ llama-cpp-python