Increasing LoRA rank to 32 and updating loss function
Browse files- fine-tuned-model/{checkpoint-295 β checkpoint-1121}/README.md +0 -0
- fine-tuned-model/{checkpoint-354 β checkpoint-1121}/adapter_config.json +0 -0
- fine-tuned-model/{checkpoint-295 β checkpoint-1121}/adapter_model.safetensors +1 -1
- fine-tuned-model/{checkpoint-354 β checkpoint-1121}/optimizer.pt +1 -1
- fine-tuned-model/{checkpoint-354 β checkpoint-1121}/rng_state.pth +1 -1
- fine-tuned-model/{checkpoint-354 β checkpoint-1121}/scaler.pt +1 -1
- fine-tuned-model/{checkpoint-295 β checkpoint-1121}/scheduler.pt +1 -1
- fine-tuned-model/{checkpoint-295 β checkpoint-1121}/special_tokens_map.json +0 -0
- fine-tuned-model/{checkpoint-295 β checkpoint-1121}/tokenizer.json +0 -0
- fine-tuned-model/{checkpoint-295 β checkpoint-1121}/tokenizer_config.json +0 -0
- fine-tuned-model/checkpoint-1121/trainer_state.json +349 -0
- fine-tuned-model/{checkpoint-354 β checkpoint-1121}/training_args.bin +1 -1
- fine-tuned-model/checkpoint-295/trainer_state.json +0 -109
- fine-tuned-model/checkpoint-354/trainer_state.json +0 -107
- fine-tuned-model/{checkpoint-354 β checkpoint-590}/README.md +0 -0
- fine-tuned-model/{checkpoint-295 β checkpoint-590}/adapter_config.json +2 -2
- fine-tuned-model/{checkpoint-354 β checkpoint-590}/adapter_model.safetensors +2 -2
- fine-tuned-model/{checkpoint-295 β checkpoint-590}/optimizer.pt +2 -2
- fine-tuned-model/{checkpoint-295 β checkpoint-590}/rng_state.pth +1 -1
- fine-tuned-model/{checkpoint-295 β checkpoint-590}/scaler.pt +1 -1
- fine-tuned-model/{checkpoint-354 β checkpoint-590}/scheduler.pt +1 -1
- fine-tuned-model/{checkpoint-354 β checkpoint-590}/special_tokens_map.json +0 -0
- fine-tuned-model/{checkpoint-354 β checkpoint-590}/tokenizer.json +0 -0
- fine-tuned-model/{checkpoint-354 β checkpoint-590}/tokenizer_config.json +0 -0
- fine-tuned-model/checkpoint-590/trainer_state.json +200 -0
- fine-tuned-model/{checkpoint-295 β checkpoint-590}/training_args.bin +1 -1
- fine-tuned-model/model.safetensors +1 -1
- fine-tuned-model/runs/Apr03_21-04-00_DESKTOP-SMJC97K/events.out.tfevents.1743739440.DESKTOP-SMJC97K.13648.0 +3 -0
- fine-tuned-model/runs/Apr04_09-11-28_DESKTOP-SMJC97K/events.out.tfevents.1743783088.DESKTOP-SMJC97K.12624.0 +3 -0
- fine-tuned-model/runs/Apr06_12-22-00_DESKTOP-SMJC97K/events.out.tfevents.1743967320.DESKTOP-SMJC97K.20424.0 +3 -0
- finetune_model.ipynb +99 -47
- test_finetuned.ipynb +39 -77
fine-tuned-model/{checkpoint-295 β checkpoint-1121}/README.md
RENAMED
File without changes
|
fine-tuned-model/{checkpoint-354 β checkpoint-1121}/adapter_config.json
RENAMED
File without changes
|
fine-tuned-model/{checkpoint-295 β checkpoint-1121}/adapter_model.safetensors
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 25191536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1f1fd45ed5bb6da6a7e92eeac091e7318b04c5ab44a21746698e6055407db26
|
3 |
size 25191536
|
fine-tuned-model/{checkpoint-354 β checkpoint-1121}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 50492858
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a590d95a55c582b9e2e80da5b7f846b4bc5b100bf9d4776020022d35932666e3
|
3 |
size 50492858
|
fine-tuned-model/{checkpoint-354 β checkpoint-1121}/rng_state.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28366cd32aadd3d8ca0d87af02b96e313773bfd18ed31387c0328dae31820b84
|
3 |
size 14244
|
fine-tuned-model/{checkpoint-354 β checkpoint-1121}/scaler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2b0170234c1d1dfefc47a409256c774d9bf2fd95dc87f6cf439883a650de5bd
|
3 |
size 988
|
fine-tuned-model/{checkpoint-295 β checkpoint-1121}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:144d877c8e0681417ebedd2a8d6df1d85acea4b51efcda810a2976cf0dd26747
|
3 |
size 1064
|
fine-tuned-model/{checkpoint-295 β checkpoint-1121}/special_tokens_map.json
RENAMED
File without changes
|
fine-tuned-model/{checkpoint-295 β checkpoint-1121}/tokenizer.json
RENAMED
File without changes
|
fine-tuned-model/{checkpoint-295 β checkpoint-1121}/tokenizer_config.json
RENAMED
File without changes
|
fine-tuned-model/checkpoint-1121/trainer_state.json
ADDED
@@ -0,0 +1,349 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_global_step": 885,
|
3 |
+
"best_metric": 1.009942650794983,
|
4 |
+
"best_model_checkpoint": "./fine-tuned-model\\checkpoint-885",
|
5 |
+
"epoch": 19.0,
|
6 |
+
"eval_steps": 500,
|
7 |
+
"global_step": 1121,
|
8 |
+
"is_hyper_param_search": false,
|
9 |
+
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": true,
|
11 |
+
"log_history": [
|
12 |
+
{
|
13 |
+
"epoch": 0.847457627118644,
|
14 |
+
"grad_norm": 0.939425528049469,
|
15 |
+
"learning_rate": 5.8983050847457634e-05,
|
16 |
+
"loss": 7.3825,
|
17 |
+
"step": 50
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"epoch": 1.0,
|
21 |
+
"eval_loss": 1.3358267545700073,
|
22 |
+
"eval_runtime": 7.2445,
|
23 |
+
"eval_samples_per_second": 14.494,
|
24 |
+
"eval_steps_per_second": 0.966,
|
25 |
+
"step": 59
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"epoch": 1.694915254237288,
|
29 |
+
"grad_norm": 2.5237913131713867,
|
30 |
+
"learning_rate": 5.796610169491525e-05,
|
31 |
+
"loss": 1.6245,
|
32 |
+
"step": 100
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"epoch": 2.0,
|
36 |
+
"eval_loss": 1.1412484645843506,
|
37 |
+
"eval_runtime": 7.1915,
|
38 |
+
"eval_samples_per_second": 14.601,
|
39 |
+
"eval_steps_per_second": 0.973,
|
40 |
+
"step": 118
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"epoch": 2.542372881355932,
|
44 |
+
"grad_norm": 7.8459625244140625,
|
45 |
+
"learning_rate": 5.6949152542372884e-05,
|
46 |
+
"loss": 1.4469,
|
47 |
+
"step": 150
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 3.0,
|
51 |
+
"eval_loss": 1.0957719087600708,
|
52 |
+
"eval_runtime": 7.3705,
|
53 |
+
"eval_samples_per_second": 14.246,
|
54 |
+
"eval_steps_per_second": 0.95,
|
55 |
+
"step": 177
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"epoch": 3.389830508474576,
|
59 |
+
"grad_norm": 1.5296450853347778,
|
60 |
+
"learning_rate": 5.593220338983051e-05,
|
61 |
+
"loss": 1.3912,
|
62 |
+
"step": 200
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"epoch": 4.0,
|
66 |
+
"eval_loss": 1.072253704071045,
|
67 |
+
"eval_runtime": 7.3695,
|
68 |
+
"eval_samples_per_second": 14.248,
|
69 |
+
"eval_steps_per_second": 0.95,
|
70 |
+
"step": 236
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"epoch": 4.237288135593221,
|
74 |
+
"grad_norm": 1.9591976404190063,
|
75 |
+
"learning_rate": 5.4915254237288135e-05,
|
76 |
+
"loss": 1.2829,
|
77 |
+
"step": 250
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"epoch": 5.0,
|
81 |
+
"eval_loss": 1.1338605880737305,
|
82 |
+
"eval_runtime": 7.1975,
|
83 |
+
"eval_samples_per_second": 14.588,
|
84 |
+
"eval_steps_per_second": 0.973,
|
85 |
+
"step": 295
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"epoch": 5.084745762711864,
|
89 |
+
"grad_norm": 1.4545025825500488,
|
90 |
+
"learning_rate": 5.389830508474577e-05,
|
91 |
+
"loss": 1.3583,
|
92 |
+
"step": 300
|
93 |
+
},
|
94 |
+
{
|
95 |
+
"epoch": 5.932203389830509,
|
96 |
+
"grad_norm": 1.4759844541549683,
|
97 |
+
"learning_rate": 5.288135593220339e-05,
|
98 |
+
"loss": 1.295,
|
99 |
+
"step": 350
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"epoch": 6.0,
|
103 |
+
"eval_loss": 1.043823480606079,
|
104 |
+
"eval_runtime": 7.226,
|
105 |
+
"eval_samples_per_second": 14.531,
|
106 |
+
"eval_steps_per_second": 0.969,
|
107 |
+
"step": 354
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 6.779661016949152,
|
111 |
+
"grad_norm": 1.5640958547592163,
|
112 |
+
"learning_rate": 5.186440677966102e-05,
|
113 |
+
"loss": 1.2581,
|
114 |
+
"step": 400
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 7.0,
|
118 |
+
"eval_loss": 1.0363339185714722,
|
119 |
+
"eval_runtime": 7.3055,
|
120 |
+
"eval_samples_per_second": 14.373,
|
121 |
+
"eval_steps_per_second": 0.958,
|
122 |
+
"step": 413
|
123 |
+
},
|
124 |
+
{
|
125 |
+
"epoch": 7.627118644067797,
|
126 |
+
"grad_norm": 2.0497965812683105,
|
127 |
+
"learning_rate": 5.0847457627118643e-05,
|
128 |
+
"loss": 1.2544,
|
129 |
+
"step": 450
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"epoch": 8.0,
|
133 |
+
"eval_loss": 1.067766785621643,
|
134 |
+
"eval_runtime": 7.3717,
|
135 |
+
"eval_samples_per_second": 14.244,
|
136 |
+
"eval_steps_per_second": 0.95,
|
137 |
+
"step": 472
|
138 |
+
},
|
139 |
+
{
|
140 |
+
"epoch": 8.474576271186441,
|
141 |
+
"grad_norm": 1.2606173753738403,
|
142 |
+
"learning_rate": 4.9830508474576276e-05,
|
143 |
+
"loss": 1.3097,
|
144 |
+
"step": 500
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"epoch": 9.0,
|
148 |
+
"eval_loss": 1.02413809299469,
|
149 |
+
"eval_runtime": 7.1818,
|
150 |
+
"eval_samples_per_second": 14.62,
|
151 |
+
"eval_steps_per_second": 0.975,
|
152 |
+
"step": 531
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"epoch": 9.322033898305085,
|
156 |
+
"grad_norm": 2.3118815422058105,
|
157 |
+
"learning_rate": 4.88135593220339e-05,
|
158 |
+
"loss": 1.2787,
|
159 |
+
"step": 550
|
160 |
+
},
|
161 |
+
{
|
162 |
+
"epoch": 10.0,
|
163 |
+
"eval_loss": 1.013655424118042,
|
164 |
+
"eval_runtime": 7.1577,
|
165 |
+
"eval_samples_per_second": 14.67,
|
166 |
+
"eval_steps_per_second": 0.978,
|
167 |
+
"step": 590
|
168 |
+
},
|
169 |
+
{
|
170 |
+
"epoch": 10.169491525423728,
|
171 |
+
"grad_norm": 1.2859658002853394,
|
172 |
+
"learning_rate": 4.7796610169491526e-05,
|
173 |
+
"loss": 1.2354,
|
174 |
+
"step": 600
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"epoch": 11.0,
|
178 |
+
"eval_loss": 1.0547661781311035,
|
179 |
+
"eval_runtime": 7.1926,
|
180 |
+
"eval_samples_per_second": 14.598,
|
181 |
+
"eval_steps_per_second": 0.973,
|
182 |
+
"step": 649
|
183 |
+
},
|
184 |
+
{
|
185 |
+
"epoch": 11.016949152542374,
|
186 |
+
"grad_norm": 2.121445417404175,
|
187 |
+
"learning_rate": 4.677966101694916e-05,
|
188 |
+
"loss": 1.2596,
|
189 |
+
"step": 650
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"epoch": 11.864406779661017,
|
193 |
+
"grad_norm": 3.0464370250701904,
|
194 |
+
"learning_rate": 4.576271186440678e-05,
|
195 |
+
"loss": 1.2646,
|
196 |
+
"step": 700
|
197 |
+
},
|
198 |
+
{
|
199 |
+
"epoch": 12.0,
|
200 |
+
"eval_loss": 1.0133599042892456,
|
201 |
+
"eval_runtime": 10.6666,
|
202 |
+
"eval_samples_per_second": 9.844,
|
203 |
+
"eval_steps_per_second": 0.656,
|
204 |
+
"step": 708
|
205 |
+
},
|
206 |
+
{
|
207 |
+
"epoch": 12.711864406779661,
|
208 |
+
"grad_norm": 1.1342540979385376,
|
209 |
+
"learning_rate": 4.474576271186441e-05,
|
210 |
+
"loss": 1.2068,
|
211 |
+
"step": 750
|
212 |
+
},
|
213 |
+
{
|
214 |
+
"epoch": 13.0,
|
215 |
+
"eval_loss": 1.0467838048934937,
|
216 |
+
"eval_runtime": 11.4351,
|
217 |
+
"eval_samples_per_second": 9.182,
|
218 |
+
"eval_steps_per_second": 0.612,
|
219 |
+
"step": 767
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 13.559322033898304,
|
223 |
+
"grad_norm": 2.094381093978882,
|
224 |
+
"learning_rate": 4.3728813559322035e-05,
|
225 |
+
"loss": 1.2955,
|
226 |
+
"step": 800
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 14.0,
|
230 |
+
"eval_loss": 1.0249124765396118,
|
231 |
+
"eval_runtime": 13.2701,
|
232 |
+
"eval_samples_per_second": 7.913,
|
233 |
+
"eval_steps_per_second": 0.528,
|
234 |
+
"step": 826
|
235 |
+
},
|
236 |
+
{
|
237 |
+
"epoch": 14.40677966101695,
|
238 |
+
"grad_norm": 1.0174381732940674,
|
239 |
+
"learning_rate": 4.271186440677966e-05,
|
240 |
+
"loss": 1.2215,
|
241 |
+
"step": 850
|
242 |
+
},
|
243 |
+
{
|
244 |
+
"epoch": 15.0,
|
245 |
+
"eval_loss": 1.009942650794983,
|
246 |
+
"eval_runtime": 10.124,
|
247 |
+
"eval_samples_per_second": 10.371,
|
248 |
+
"eval_steps_per_second": 0.691,
|
249 |
+
"step": 885
|
250 |
+
},
|
251 |
+
{
|
252 |
+
"epoch": 15.254237288135593,
|
253 |
+
"grad_norm": 1.1202493906021118,
|
254 |
+
"learning_rate": 4.169491525423729e-05,
|
255 |
+
"loss": 1.2365,
|
256 |
+
"step": 900
|
257 |
+
},
|
258 |
+
{
|
259 |
+
"epoch": 16.0,
|
260 |
+
"eval_loss": 1.0121246576309204,
|
261 |
+
"eval_runtime": 9.974,
|
262 |
+
"eval_samples_per_second": 10.527,
|
263 |
+
"eval_steps_per_second": 0.702,
|
264 |
+
"step": 944
|
265 |
+
},
|
266 |
+
{
|
267 |
+
"epoch": 16.10169491525424,
|
268 |
+
"grad_norm": 1.1021959781646729,
|
269 |
+
"learning_rate": 4.067796610169492e-05,
|
270 |
+
"loss": 1.2412,
|
271 |
+
"step": 950
|
272 |
+
},
|
273 |
+
{
|
274 |
+
"epoch": 16.949152542372882,
|
275 |
+
"grad_norm": 0.9624550938606262,
|
276 |
+
"learning_rate": 3.966101694915254e-05,
|
277 |
+
"loss": 1.2348,
|
278 |
+
"step": 1000
|
279 |
+
},
|
280 |
+
{
|
281 |
+
"epoch": 17.0,
|
282 |
+
"eval_loss": 1.0155479907989502,
|
283 |
+
"eval_runtime": 8.9635,
|
284 |
+
"eval_samples_per_second": 11.714,
|
285 |
+
"eval_steps_per_second": 0.781,
|
286 |
+
"step": 1003
|
287 |
+
},
|
288 |
+
{
|
289 |
+
"epoch": 17.796610169491526,
|
290 |
+
"grad_norm": 0.9586867094039917,
|
291 |
+
"learning_rate": 3.864406779661017e-05,
|
292 |
+
"loss": 1.2455,
|
293 |
+
"step": 1050
|
294 |
+
},
|
295 |
+
{
|
296 |
+
"epoch": 18.0,
|
297 |
+
"eval_loss": 1.0335369110107422,
|
298 |
+
"eval_runtime": 9.0555,
|
299 |
+
"eval_samples_per_second": 11.595,
|
300 |
+
"eval_steps_per_second": 0.773,
|
301 |
+
"step": 1062
|
302 |
+
},
|
303 |
+
{
|
304 |
+
"epoch": 18.64406779661017,
|
305 |
+
"grad_norm": 1.7303390502929688,
|
306 |
+
"learning_rate": 3.76271186440678e-05,
|
307 |
+
"loss": 1.2238,
|
308 |
+
"step": 1100
|
309 |
+
},
|
310 |
+
{
|
311 |
+
"epoch": 19.0,
|
312 |
+
"eval_loss": 1.020735263824463,
|
313 |
+
"eval_runtime": 9.6479,
|
314 |
+
"eval_samples_per_second": 10.883,
|
315 |
+
"eval_steps_per_second": 0.726,
|
316 |
+
"step": 1121
|
317 |
+
}
|
318 |
+
],
|
319 |
+
"logging_steps": 50,
|
320 |
+
"max_steps": 2950,
|
321 |
+
"num_input_tokens_seen": 0,
|
322 |
+
"num_train_epochs": 50,
|
323 |
+
"save_steps": 500,
|
324 |
+
"stateful_callbacks": {
|
325 |
+
"EarlyStoppingCallback": {
|
326 |
+
"args": {
|
327 |
+
"early_stopping_patience": 4,
|
328 |
+
"early_stopping_threshold": 0.0
|
329 |
+
},
|
330 |
+
"attributes": {
|
331 |
+
"early_stopping_patience_counter": 4
|
332 |
+
}
|
333 |
+
},
|
334 |
+
"TrainerControl": {
|
335 |
+
"args": {
|
336 |
+
"should_epoch_stop": false,
|
337 |
+
"should_evaluate": false,
|
338 |
+
"should_log": false,
|
339 |
+
"should_save": true,
|
340 |
+
"should_training_stop": true
|
341 |
+
},
|
342 |
+
"attributes": {}
|
343 |
+
}
|
344 |
+
},
|
345 |
+
"total_flos": 3.5260523640520704e+16,
|
346 |
+
"train_batch_size": 16,
|
347 |
+
"trial_name": null,
|
348 |
+
"trial_params": null
|
349 |
+
}
|
fine-tuned-model/{checkpoint-354 β checkpoint-1121}/training_args.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5368
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb126e0e8412d31a50774b555d6da314217e8af8f8f466b0f189bdae98050751
|
3 |
size 5368
|
fine-tuned-model/checkpoint-295/trainer_state.json
DELETED
@@ -1,109 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"best_global_step": null,
|
3 |
-
"best_metric": null,
|
4 |
-
"best_model_checkpoint": null,
|
5 |
-
"epoch": 5.0,
|
6 |
-
"eval_steps": 500,
|
7 |
-
"global_step": 295,
|
8 |
-
"is_hyper_param_search": false,
|
9 |
-
"is_local_process_zero": true,
|
10 |
-
"is_world_process_zero": true,
|
11 |
-
"log_history": [
|
12 |
-
{
|
13 |
-
"epoch": 0.847457627118644,
|
14 |
-
"grad_norm": 3.6365857124328613,
|
15 |
-
"learning_rate": 4.152542372881356e-05,
|
16 |
-
"loss": 8.5103,
|
17 |
-
"step": 50
|
18 |
-
},
|
19 |
-
{
|
20 |
-
"epoch": 1.0,
|
21 |
-
"eval_loss": 1.4866021871566772,
|
22 |
-
"eval_runtime": 5.4305,
|
23 |
-
"eval_samples_per_second": 19.335,
|
24 |
-
"eval_steps_per_second": 1.289,
|
25 |
-
"step": 59
|
26 |
-
},
|
27 |
-
{
|
28 |
-
"epoch": 1.694915254237288,
|
29 |
-
"grad_norm": 3.137465715408325,
|
30 |
-
"learning_rate": 3.305084745762712e-05,
|
31 |
-
"loss": 1.7098,
|
32 |
-
"step": 100
|
33 |
-
},
|
34 |
-
{
|
35 |
-
"epoch": 2.0,
|
36 |
-
"eval_loss": 1.2273037433624268,
|
37 |
-
"eval_runtime": 5.362,
|
38 |
-
"eval_samples_per_second": 19.582,
|
39 |
-
"eval_steps_per_second": 1.305,
|
40 |
-
"step": 118
|
41 |
-
},
|
42 |
-
{
|
43 |
-
"epoch": 2.542372881355932,
|
44 |
-
"grad_norm": 1.6243258714675903,
|
45 |
-
"learning_rate": 2.457627118644068e-05,
|
46 |
-
"loss": 1.5421,
|
47 |
-
"step": 150
|
48 |
-
},
|
49 |
-
{
|
50 |
-
"epoch": 3.0,
|
51 |
-
"eval_loss": 1.1611202955245972,
|
52 |
-
"eval_runtime": 5.348,
|
53 |
-
"eval_samples_per_second": 19.634,
|
54 |
-
"eval_steps_per_second": 1.309,
|
55 |
-
"step": 177
|
56 |
-
},
|
57 |
-
{
|
58 |
-
"epoch": 3.389830508474576,
|
59 |
-
"grad_norm": 1.7812302112579346,
|
60 |
-
"learning_rate": 1.6101694915254237e-05,
|
61 |
-
"loss": 1.4875,
|
62 |
-
"step": 200
|
63 |
-
},
|
64 |
-
{
|
65 |
-
"epoch": 4.0,
|
66 |
-
"eval_loss": 1.153254508972168,
|
67 |
-
"eval_runtime": 5.347,
|
68 |
-
"eval_samples_per_second": 19.637,
|
69 |
-
"eval_steps_per_second": 1.309,
|
70 |
-
"step": 236
|
71 |
-
},
|
72 |
-
{
|
73 |
-
"epoch": 4.237288135593221,
|
74 |
-
"grad_norm": 2.1582489013671875,
|
75 |
-
"learning_rate": 7.627118644067798e-06,
|
76 |
-
"loss": 1.3883,
|
77 |
-
"step": 250
|
78 |
-
},
|
79 |
-
{
|
80 |
-
"epoch": 5.0,
|
81 |
-
"eval_loss": 1.1216797828674316,
|
82 |
-
"eval_runtime": 5.3095,
|
83 |
-
"eval_samples_per_second": 19.776,
|
84 |
-
"eval_steps_per_second": 1.318,
|
85 |
-
"step": 295
|
86 |
-
}
|
87 |
-
],
|
88 |
-
"logging_steps": 50,
|
89 |
-
"max_steps": 295,
|
90 |
-
"num_input_tokens_seen": 0,
|
91 |
-
"num_train_epochs": 5,
|
92 |
-
"save_steps": 500,
|
93 |
-
"stateful_callbacks": {
|
94 |
-
"TrainerControl": {
|
95 |
-
"args": {
|
96 |
-
"should_epoch_stop": false,
|
97 |
-
"should_evaluate": false,
|
98 |
-
"should_log": false,
|
99 |
-
"should_save": true,
|
100 |
-
"should_training_stop": true
|
101 |
-
},
|
102 |
-
"attributes": {}
|
103 |
-
}
|
104 |
-
},
|
105 |
-
"total_flos": 9279085168558080.0,
|
106 |
-
"train_batch_size": 16,
|
107 |
-
"trial_name": null,
|
108 |
-
"trial_params": null
|
109 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fine-tuned-model/checkpoint-354/trainer_state.json
DELETED
@@ -1,107 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"best_global_step": null,
|
3 |
-
"best_metric": null,
|
4 |
-
"best_model_checkpoint": null,
|
5 |
-
"epoch": 3.0,
|
6 |
-
"eval_steps": 500,
|
7 |
-
"global_step": 354,
|
8 |
-
"is_hyper_param_search": false,
|
9 |
-
"is_local_process_zero": true,
|
10 |
-
"is_world_process_zero": true,
|
11 |
-
"log_history": [
|
12 |
-
{
|
13 |
-
"epoch": 0.423728813559322,
|
14 |
-
"grad_norm": 1.3252296447753906,
|
15 |
-
"learning_rate": 0.0004307909604519774,
|
16 |
-
"loss": 3.0243,
|
17 |
-
"step": 50
|
18 |
-
},
|
19 |
-
{
|
20 |
-
"epoch": 0.847457627118644,
|
21 |
-
"grad_norm": 1.302614450454712,
|
22 |
-
"learning_rate": 0.00036016949152542374,
|
23 |
-
"loss": 1.2356,
|
24 |
-
"step": 100
|
25 |
-
},
|
26 |
-
{
|
27 |
-
"epoch": 1.0,
|
28 |
-
"eval_loss": 0.9709166288375854,
|
29 |
-
"eval_runtime": 5.194,
|
30 |
-
"eval_samples_per_second": 20.216,
|
31 |
-
"eval_steps_per_second": 2.695,
|
32 |
-
"step": 118
|
33 |
-
},
|
34 |
-
{
|
35 |
-
"epoch": 1.271186440677966,
|
36 |
-
"grad_norm": 0.6076271533966064,
|
37 |
-
"learning_rate": 0.0002895480225988701,
|
38 |
-
"loss": 1.2005,
|
39 |
-
"step": 150
|
40 |
-
},
|
41 |
-
{
|
42 |
-
"epoch": 1.694915254237288,
|
43 |
-
"grad_norm": 1.1516226530075073,
|
44 |
-
"learning_rate": 0.0002189265536723164,
|
45 |
-
"loss": 1.2331,
|
46 |
-
"step": 200
|
47 |
-
},
|
48 |
-
{
|
49 |
-
"epoch": 2.0,
|
50 |
-
"eval_loss": 0.9370157718658447,
|
51 |
-
"eval_runtime": 5.1425,
|
52 |
-
"eval_samples_per_second": 20.418,
|
53 |
-
"eval_steps_per_second": 2.722,
|
54 |
-
"step": 236
|
55 |
-
},
|
56 |
-
{
|
57 |
-
"epoch": 2.1186440677966103,
|
58 |
-
"grad_norm": 0.5812012553215027,
|
59 |
-
"learning_rate": 0.0001483050847457627,
|
60 |
-
"loss": 1.1483,
|
61 |
-
"step": 250
|
62 |
-
},
|
63 |
-
{
|
64 |
-
"epoch": 2.542372881355932,
|
65 |
-
"grad_norm": 2.018043279647827,
|
66 |
-
"learning_rate": 7.768361581920904e-05,
|
67 |
-
"loss": 1.1873,
|
68 |
-
"step": 300
|
69 |
-
},
|
70 |
-
{
|
71 |
-
"epoch": 2.9661016949152543,
|
72 |
-
"grad_norm": 0.5886570811271667,
|
73 |
-
"learning_rate": 7.062146892655367e-06,
|
74 |
-
"loss": 1.1576,
|
75 |
-
"step": 350
|
76 |
-
},
|
77 |
-
{
|
78 |
-
"epoch": 3.0,
|
79 |
-
"eval_loss": 0.9401432871818542,
|
80 |
-
"eval_runtime": 5.1425,
|
81 |
-
"eval_samples_per_second": 20.418,
|
82 |
-
"eval_steps_per_second": 2.722,
|
83 |
-
"step": 354
|
84 |
-
}
|
85 |
-
],
|
86 |
-
"logging_steps": 50,
|
87 |
-
"max_steps": 354,
|
88 |
-
"num_input_tokens_seen": 0,
|
89 |
-
"num_train_epochs": 3,
|
90 |
-
"save_steps": 500,
|
91 |
-
"stateful_callbacks": {
|
92 |
-
"TrainerControl": {
|
93 |
-
"args": {
|
94 |
-
"should_epoch_stop": false,
|
95 |
-
"should_evaluate": false,
|
96 |
-
"should_log": false,
|
97 |
-
"should_save": true,
|
98 |
-
"should_training_stop": true
|
99 |
-
},
|
100 |
-
"attributes": {}
|
101 |
-
}
|
102 |
-
},
|
103 |
-
"total_flos": 5567451101134848.0,
|
104 |
-
"train_batch_size": 8,
|
105 |
-
"trial_name": null,
|
106 |
-
"trial_params": null
|
107 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fine-tuned-model/{checkpoint-354 β checkpoint-590}/README.md
RENAMED
File without changes
|
fine-tuned-model/{checkpoint-295 β checkpoint-590}/adapter_config.json
RENAMED
@@ -20,12 +20,12 @@
|
|
20 |
"megatron_core": "megatron.core",
|
21 |
"modules_to_save": null,
|
22 |
"peft_type": "LORA",
|
23 |
-
"r":
|
24 |
"rank_pattern": {},
|
25 |
"revision": null,
|
26 |
"target_modules": [
|
27 |
-
"k_proj",
|
28 |
"q_proj",
|
|
|
29 |
"o_proj",
|
30 |
"v_proj"
|
31 |
],
|
|
|
20 |
"megatron_core": "megatron.core",
|
21 |
"modules_to_save": null,
|
22 |
"peft_type": "LORA",
|
23 |
+
"r": 32,
|
24 |
"rank_pattern": {},
|
25 |
"revision": null,
|
26 |
"target_modules": [
|
|
|
27 |
"q_proj",
|
28 |
+
"k_proj",
|
29 |
"o_proj",
|
30 |
"v_proj"
|
31 |
],
|
fine-tuned-model/{checkpoint-354 β checkpoint-590}/adapter_model.safetensors
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bd78c8222b91c5a43eef81fb14dfb052607a1e986c931f28dbf005c1c762963
|
3 |
+
size 50357440
|
fine-tuned-model/{checkpoint-295 β checkpoint-590}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:378f24a6f93dde2886215460fba55025509bed73deec840e856c2fc0ae7f20dd
|
3 |
+
size 100825274
|
fine-tuned-model/{checkpoint-295 β checkpoint-590}/rng_state.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e1486d84435f60895ed0edc99f537f9a3ec350361fbd8798e501803966814c2
|
3 |
size 14244
|
fine-tuned-model/{checkpoint-295 β checkpoint-590}/scaler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef038376ed0ccb6d992bae6f264fc61e513f2c11e7ddccb8f3b500fe3976c969
|
3 |
size 988
|
fine-tuned-model/{checkpoint-354 β checkpoint-590}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a773639ece71e52af3d21940488177b6e201c3d8fb49851b0d6319abe8767df
|
3 |
size 1064
|
fine-tuned-model/{checkpoint-354 β checkpoint-590}/special_tokens_map.json
RENAMED
File without changes
|
fine-tuned-model/{checkpoint-354 β checkpoint-590}/tokenizer.json
RENAMED
File without changes
|
fine-tuned-model/{checkpoint-354 β checkpoint-590}/tokenizer_config.json
RENAMED
File without changes
|
fine-tuned-model/checkpoint-590/trainer_state.json
ADDED
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_global_step": 590,
|
3 |
+
"best_metric": 0.9585100412368774,
|
4 |
+
"best_model_checkpoint": "./fine-tuned-model\\checkpoint-590",
|
5 |
+
"epoch": 10.0,
|
6 |
+
"eval_steps": 500,
|
7 |
+
"global_step": 590,
|
8 |
+
"is_hyper_param_search": false,
|
9 |
+
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": true,
|
11 |
+
"log_history": [
|
12 |
+
{
|
13 |
+
"epoch": 0.847457627118644,
|
14 |
+
"grad_norm": 2.3572731018066406,
|
15 |
+
"learning_rate": 3.932203389830509e-05,
|
16 |
+
"loss": 9.5716,
|
17 |
+
"step": 50
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"epoch": 1.0,
|
21 |
+
"eval_loss": 1.5832620859146118,
|
22 |
+
"eval_runtime": 5.4855,
|
23 |
+
"eval_samples_per_second": 19.141,
|
24 |
+
"eval_steps_per_second": 1.276,
|
25 |
+
"step": 59
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"epoch": 1.694915254237288,
|
29 |
+
"grad_norm": 3.8656413555145264,
|
30 |
+
"learning_rate": 3.8644067796610175e-05,
|
31 |
+
"loss": 1.7466,
|
32 |
+
"step": 100
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"epoch": 2.0,
|
36 |
+
"eval_loss": 1.1679714918136597,
|
37 |
+
"eval_runtime": 5.364,
|
38 |
+
"eval_samples_per_second": 19.575,
|
39 |
+
"eval_steps_per_second": 1.305,
|
40 |
+
"step": 118
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"epoch": 2.542372881355932,
|
44 |
+
"grad_norm": 2.582818031311035,
|
45 |
+
"learning_rate": 3.796610169491526e-05,
|
46 |
+
"loss": 1.5173,
|
47 |
+
"step": 150
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 3.0,
|
51 |
+
"eval_loss": 1.0937272310256958,
|
52 |
+
"eval_runtime": 5.4735,
|
53 |
+
"eval_samples_per_second": 19.183,
|
54 |
+
"eval_steps_per_second": 1.279,
|
55 |
+
"step": 177
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"epoch": 3.389830508474576,
|
59 |
+
"grad_norm": 0.9783422350883484,
|
60 |
+
"learning_rate": 3.728813559322034e-05,
|
61 |
+
"loss": 1.4233,
|
62 |
+
"step": 200
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"epoch": 4.0,
|
66 |
+
"eval_loss": 1.0387905836105347,
|
67 |
+
"eval_runtime": 5.354,
|
68 |
+
"eval_samples_per_second": 19.612,
|
69 |
+
"eval_steps_per_second": 1.307,
|
70 |
+
"step": 236
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"epoch": 4.237288135593221,
|
74 |
+
"grad_norm": 0.6458675265312195,
|
75 |
+
"learning_rate": 3.6610169491525426e-05,
|
76 |
+
"loss": 1.3044,
|
77 |
+
"step": 250
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"epoch": 5.0,
|
81 |
+
"eval_loss": 1.0661542415618896,
|
82 |
+
"eval_runtime": 5.4235,
|
83 |
+
"eval_samples_per_second": 19.36,
|
84 |
+
"eval_steps_per_second": 1.291,
|
85 |
+
"step": 295
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"epoch": 5.084745762711864,
|
89 |
+
"grad_norm": 1.3425668478012085,
|
90 |
+
"learning_rate": 3.593220338983051e-05,
|
91 |
+
"loss": 1.3608,
|
92 |
+
"step": 300
|
93 |
+
},
|
94 |
+
{
|
95 |
+
"epoch": 5.932203389830509,
|
96 |
+
"grad_norm": 1.331030011177063,
|
97 |
+
"learning_rate": 3.52542372881356e-05,
|
98 |
+
"loss": 1.2839,
|
99 |
+
"step": 350
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"epoch": 6.0,
|
103 |
+
"eval_loss": 0.9894506335258484,
|
104 |
+
"eval_runtime": 5.406,
|
105 |
+
"eval_samples_per_second": 19.423,
|
106 |
+
"eval_steps_per_second": 1.295,
|
107 |
+
"step": 354
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 6.779661016949152,
|
111 |
+
"grad_norm": 1.02914297580719,
|
112 |
+
"learning_rate": 3.457627118644068e-05,
|
113 |
+
"loss": 1.2485,
|
114 |
+
"step": 400
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 7.0,
|
118 |
+
"eval_loss": 0.9816469550132751,
|
119 |
+
"eval_runtime": 5.4105,
|
120 |
+
"eval_samples_per_second": 19.407,
|
121 |
+
"eval_steps_per_second": 1.294,
|
122 |
+
"step": 413
|
123 |
+
},
|
124 |
+
{
|
125 |
+
"epoch": 7.627118644067797,
|
126 |
+
"grad_norm": 1.8862000703811646,
|
127 |
+
"learning_rate": 3.389830508474576e-05,
|
128 |
+
"loss": 1.2426,
|
129 |
+
"step": 450
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"epoch": 8.0,
|
133 |
+
"eval_loss": 1.0074799060821533,
|
134 |
+
"eval_runtime": 5.423,
|
135 |
+
"eval_samples_per_second": 19.362,
|
136 |
+
"eval_steps_per_second": 1.291,
|
137 |
+
"step": 472
|
138 |
+
},
|
139 |
+
{
|
140 |
+
"epoch": 8.474576271186441,
|
141 |
+
"grad_norm": 0.9509351849555969,
|
142 |
+
"learning_rate": 3.322033898305085e-05,
|
143 |
+
"loss": 1.2903,
|
144 |
+
"step": 500
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"epoch": 9.0,
|
148 |
+
"eval_loss": 0.9700178503990173,
|
149 |
+
"eval_runtime": 5.388,
|
150 |
+
"eval_samples_per_second": 19.488,
|
151 |
+
"eval_steps_per_second": 1.299,
|
152 |
+
"step": 531
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"epoch": 9.322033898305085,
|
156 |
+
"grad_norm": 1.861725926399231,
|
157 |
+
"learning_rate": 3.2542372881355934e-05,
|
158 |
+
"loss": 1.2588,
|
159 |
+
"step": 550
|
160 |
+
},
|
161 |
+
{
|
162 |
+
"epoch": 10.0,
|
163 |
+
"eval_loss": 0.9585100412368774,
|
164 |
+
"eval_runtime": 5.409,
|
165 |
+
"eval_samples_per_second": 19.412,
|
166 |
+
"eval_steps_per_second": 1.294,
|
167 |
+
"step": 590
|
168 |
+
}
|
169 |
+
],
|
170 |
+
"logging_steps": 50,
|
171 |
+
"max_steps": 2950,
|
172 |
+
"num_input_tokens_seen": 0,
|
173 |
+
"num_train_epochs": 50,
|
174 |
+
"save_steps": 500,
|
175 |
+
"stateful_callbacks": {
|
176 |
+
"EarlyStoppingCallback": {
|
177 |
+
"args": {
|
178 |
+
"early_stopping_patience": 2,
|
179 |
+
"early_stopping_threshold": 0.0
|
180 |
+
},
|
181 |
+
"attributes": {
|
182 |
+
"early_stopping_patience_counter": 0
|
183 |
+
}
|
184 |
+
},
|
185 |
+
"TrainerControl": {
|
186 |
+
"args": {
|
187 |
+
"should_epoch_stop": false,
|
188 |
+
"should_evaluate": false,
|
189 |
+
"should_log": false,
|
190 |
+
"should_save": true,
|
191 |
+
"should_training_stop": false
|
192 |
+
},
|
193 |
+
"attributes": {}
|
194 |
+
}
|
195 |
+
},
|
196 |
+
"total_flos": 1.86489122586624e+16,
|
197 |
+
"train_batch_size": 16,
|
198 |
+
"trial_name": null,
|
199 |
+
"trial_params": null
|
200 |
+
}
|
fine-tuned-model/{checkpoint-295 β checkpoint-590}/training_args.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5368
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dbc31a968883ba1c37ac7256a236ef457c3a308b208d06d810a9cc1c7385f86a
|
3 |
size 5368
|
fine-tuned-model/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1480793144
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95cc3d2d2f4148921f13714c2b852562d2616722a7178361623f64e4339f0051
|
3 |
size 1480793144
|
fine-tuned-model/runs/Apr03_21-04-00_DESKTOP-SMJC97K/events.out.tfevents.1743739440.DESKTOP-SMJC97K.13648.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1ac21d76b4b1a98d5bd7cd3227cbd39d36f6159fbfd9788d75abf80fc773513
|
3 |
+
size 12206
|
fine-tuned-model/runs/Apr04_09-11-28_DESKTOP-SMJC97K/events.out.tfevents.1743783088.DESKTOP-SMJC97K.12624.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4b3ce33677563c06e296678e5116701cd3be77c97d996cf838591c38fe6185d
|
3 |
+
size 15791
|
fine-tuned-model/runs/Apr06_12-22-00_DESKTOP-SMJC97K/events.out.tfevents.1743967320.DESKTOP-SMJC97K.20424.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05abf9b879ca9f8506f60328b453852dc688840e099650a234aeef0e0080671d
|
3 |
+
size 12206
|
finetune_model.ipynb
CHANGED
@@ -175,9 +175,7 @@
|
|
175 |
"Request:\n",
|
176 |
"\"What is the most points the Los Angeles Lakers have ever scored at home?\"\n",
|
177 |
"SQLite:\n",
|
178 |
-
"SELECT MAX(pts_home)
|
179 |
-
"FROM game \n",
|
180 |
-
"WHERE team_name_home = 'Los Angeles Lakers';\n",
|
181 |
"\n",
|
182 |
"Request:\n",
|
183 |
"\"Which teams are located in the state of California?\"\n",
|
@@ -197,9 +195,7 @@
|
|
197 |
"Request:\n",
|
198 |
"\"Find the Boston Celtics largest home victory margin in the 2008 season.\"\n",
|
199 |
"SQLite:\n",
|
200 |
-
"SELECT MAX(pts_home - pts_away) AS biggest_win
|
201 |
-
"FROM game\n",
|
202 |
-
"WHERE team_name_home = 'Boston Celtics' AND season_id = '22008';\n",
|
203 |
"\n",
|
204 |
"Generate only the SQLite query prefaced by SQLite: and no other text, do not output an explanation of the query. Now generate an SQLite query for the following user request. Request:\n",
|
205 |
"\"\"\""
|
@@ -230,7 +226,21 @@
|
|
230 |
"output_type": "stream",
|
231 |
"text": [
|
232 |
"WARNING:tensorflow:From c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\tf_keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
|
233 |
-
"\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
234 |
"Total dataset examples: 1044\n",
|
235 |
" natural_query \\\n",
|
236 |
"0 Which NBA teams were established after the yea... \n",
|
@@ -241,10 +251,10 @@
|
|
241 |
"\n",
|
242 |
" sql_query result \n",
|
243 |
"0 SELECT full_name FROM team WHERE year_founded ... New Orleans Pelicans \n",
|
244 |
-
"1 SELECT MAX(pts_home) FROM game
|
245 |
"2 SELECT pts_home FROM game WHERE team_name_home... 156 \n",
|
246 |
-
"3 SELECT COUNT(*)
|
247 |
-
"4 SELECT AVG(ast_home)
|
248 |
]
|
249 |
},
|
250 |
{
|
@@ -413,9 +423,7 @@
|
|
413 |
"Request:\n",
|
414 |
"\"What is the most points the Los Angeles Lakers have ever scored at home?\"\n",
|
415 |
"SQLite:\n",
|
416 |
-
"SELECT MAX(pts_home)
|
417 |
-
"FROM game \n",
|
418 |
-
"WHERE team_name_home = 'Los Angeles Lakers';\n",
|
419 |
"\n",
|
420 |
"Request:\n",
|
421 |
"\"Which teams are located in the state of California?\"\n",
|
@@ -435,9 +443,7 @@
|
|
435 |
"Request:\n",
|
436 |
"\"Find the Boston Celtics largest home victory margin in the 2008 season.\"\n",
|
437 |
"SQLite:\n",
|
438 |
-
"SELECT MAX(pts_home - pts_away) AS biggest_win
|
439 |
-
"FROM game\n",
|
440 |
-
"WHERE team_name_home = 'Boston Celtics' AND season_id = '22008';\n",
|
441 |
"\n",
|
442 |
"Generate only the SQLite query prefaced by SQLite: and no other text, do not output an explanation of the query. Now generate an SQLite query for the following user request. Request:\n",
|
443 |
"Which NBA teams were established after the year 2000? List their names and founding years, sorted from newest to oldest\n",
|
@@ -449,7 +455,7 @@
|
|
449 |
"name": "stderr",
|
450 |
"output_type": "stream",
|
451 |
"text": [
|
452 |
-
"Map: 100%|ββββββββββ| 1044/1044 [00:01<00:00,
|
453 |
]
|
454 |
},
|
455 |
{
|
@@ -611,9 +617,7 @@
|
|
611 |
"Request:\n",
|
612 |
"\"What is the most points the Los Angeles Lakers have ever scored at home?\"\n",
|
613 |
"SQLite:\n",
|
614 |
-
"SELECT MAX(pts_home)
|
615 |
-
"FROM game \n",
|
616 |
-
"WHERE team_name_home = 'Los Angeles Lakers';\n",
|
617 |
"\n",
|
618 |
"Request:\n",
|
619 |
"\"Which teams are located in the state of California?\"\n",
|
@@ -633,14 +637,12 @@
|
|
633 |
"Request:\n",
|
634 |
"\"Find the Boston Celtics largest home victory margin in the 2008 season.\"\n",
|
635 |
"SQLite:\n",
|
636 |
-
"SELECT MAX(pts_home - pts_away) AS biggest_win
|
637 |
-
"FROM game\n",
|
638 |
-
"WHERE team_name_home = 'Boston Celtics' AND season_id = '22008';\n",
|
639 |
"\n",
|
640 |
"Generate only the SQLite query prefaced by SQLite: and no other text, do not output an explanation of the query. Now generate an SQLite query for the following user request. Request:\n",
|
641 |
"How many points did the Golden State Warriors score in their first game of the 2005 season?\n",
|
642 |
"SQLite: \n",
|
643 |
-
"SELECT pts_home
|
644 |
"939\n",
|
645 |
"105\n"
|
646 |
]
|
@@ -657,14 +659,17 @@
|
|
657 |
"import pandas as pd\n",
|
658 |
"import torch\n",
|
659 |
"from datasets import Dataset\n",
|
660 |
-
"from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig\n",
|
661 |
"from torch.utils.data import DataLoader\n",
|
662 |
"from peft import LoraConfig, get_peft_model, TaskType\n",
|
663 |
"import os\n",
|
|
|
664 |
"\n",
|
665 |
"# Load dataset\n",
|
666 |
"df = pd.read_csv(\"./train-data/sql_train.tsv\", sep='\\t')\n",
|
667 |
"\n",
|
|
|
|
|
668 |
"# Display dataset info\n",
|
669 |
"print(f\"Total dataset examples: {len(df)}\")\n",
|
670 |
"print(df.head())\n",
|
@@ -721,7 +726,7 @@
|
|
721 |
"name": "stdout",
|
722 |
"output_type": "stream",
|
723 |
"text": [
|
724 |
-
"trainable params:
|
725 |
]
|
726 |
}
|
727 |
],
|
@@ -745,7 +750,7 @@
|
|
745 |
"\n",
|
746 |
"# Define LoRA configuration\n",
|
747 |
"lora_config = LoraConfig(\n",
|
748 |
-
" r=
|
749 |
" lora_alpha=32, # Scaling factor\n",
|
750 |
" lora_dropout=0.1, # Dropout for regularization\n",
|
751 |
" bias=\"none\",\n",
|
@@ -782,7 +787,7 @@
|
|
782 |
"text": [
|
783 |
"c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\training_args.py:1611: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of π€ Transformers. Use `eval_strategy` instead\n",
|
784 |
" warnings.warn(\n",
|
785 |
-
"C:\\Users\\Dean\\AppData\\Local\\Temp\\
|
786 |
" trainer = Trainer(\n",
|
787 |
"No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.\n"
|
788 |
]
|
@@ -795,13 +800,16 @@
|
|
795 |
" save_strategy=\"epoch\", # Save model every epoch\n",
|
796 |
" per_device_train_batch_size=16, # LoRA allows higher batch size\n",
|
797 |
" per_device_eval_batch_size=16,\n",
|
798 |
-
" num_train_epochs=
|
799 |
-
" learning_rate=
|
800 |
" weight_decay=0.01,\n",
|
801 |
" logging_steps=50, # Print loss every 50 steps\n",
|
802 |
-
" save_total_limit=2, # Keep last
|
803 |
" fp16=True if torch.cuda.is_available() else False,\n",
|
804 |
-
" push_to_hub=False
|
|
|
|
|
|
|
805 |
")\n",
|
806 |
"\n",
|
807 |
"# Trainer setup\n",
|
@@ -810,7 +818,8 @@
|
|
810 |
" args=training_args,\n",
|
811 |
" train_dataset=train_dataset,\n",
|
812 |
" eval_dataset=val_dataset,\n",
|
813 |
-
" tokenizer=tokenizer
|
|
|
814 |
")"
|
815 |
]
|
816 |
},
|
@@ -840,8 +849,8 @@
|
|
840 |
"\n",
|
841 |
" <div>\n",
|
842 |
" \n",
|
843 |
-
" <progress value='
|
844 |
-
" [
|
845 |
" </div>\n",
|
846 |
" <table border=\"1\" class=\"dataframe\">\n",
|
847 |
" <thead>\n",
|
@@ -854,28 +863,63 @@
|
|
854 |
" <tbody>\n",
|
855 |
" <tr>\n",
|
856 |
" <td>1</td>\n",
|
857 |
-
" <td>
|
858 |
-
" <td>1.
|
859 |
" </tr>\n",
|
860 |
" <tr>\n",
|
861 |
" <td>2</td>\n",
|
862 |
-
" <td>1.
|
863 |
-
" <td>1.
|
864 |
" </tr>\n",
|
865 |
" <tr>\n",
|
866 |
" <td>3</td>\n",
|
867 |
-
" <td>1.
|
868 |
-
" <td>1.
|
869 |
" </tr>\n",
|
870 |
" <tr>\n",
|
871 |
" <td>4</td>\n",
|
872 |
-
" <td>1.
|
873 |
-
" <td>1.
|
874 |
" </tr>\n",
|
875 |
" <tr>\n",
|
876 |
" <td>5</td>\n",
|
877 |
-
" <td>1.
|
878 |
-
" <td>1.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
879 |
" </tr>\n",
|
880 |
" </tbody>\n",
|
881 |
"</table><p>"
|
@@ -927,9 +971,17 @@
|
|
927 |
},
|
928 |
{
|
929 |
"cell_type": "code",
|
930 |
-
"execution_count":
|
931 |
"metadata": {},
|
932 |
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
933 |
{
|
934 |
"name": "stdout",
|
935 |
"output_type": "stream",
|
@@ -949,7 +1001,7 @@
|
|
949 |
"inputs = tokenizer.apply_chat_template(message, add_generation_prompt=True, return_tensors=\"pt\").to(model.device)\n",
|
950 |
"\n",
|
951 |
"# Generate SQL query\n",
|
952 |
-
"outputs = model.generate(inputs, max_new_tokens=
|
953 |
"query_output = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)\n",
|
954 |
"\n",
|
955 |
"print(\"Generated SQL:\", query_output)"
|
|
|
175 |
"Request:\n",
|
176 |
"\"What is the most points the Los Angeles Lakers have ever scored at home?\"\n",
|
177 |
"SQLite:\n",
|
178 |
+
"SELECT MAX(pts_home) FROM game WHERE team_name_home = 'Los Angeles Lakers';\n",
|
|
|
|
|
179 |
"\n",
|
180 |
"Request:\n",
|
181 |
"\"Which teams are located in the state of California?\"\n",
|
|
|
195 |
"Request:\n",
|
196 |
"\"Find the Boston Celtics largest home victory margin in the 2008 season.\"\n",
|
197 |
"SQLite:\n",
|
198 |
+
"SELECT MAX(pts_home - pts_away) AS biggest_win FROM game WHERE team_name_home = 'Boston Celtics' AND season_id = '22008';\n",
|
|
|
|
|
199 |
"\n",
|
200 |
"Generate only the SQLite query prefaced by SQLite: and no other text, do not output an explanation of the query. Now generate an SQLite query for the following user request. Request:\n",
|
201 |
"\"\"\""
|
|
|
226 |
"output_type": "stream",
|
227 |
"text": [
|
228 |
"WARNING:tensorflow:From c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\tf_keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
|
229 |
+
"\n"
|
230 |
+
]
|
231 |
+
},
|
232 |
+
{
|
233 |
+
"name": "stderr",
|
234 |
+
"output_type": "stream",
|
235 |
+
"text": [
|
236 |
+
"C:\\Users\\Dean\\AppData\\Local\\Temp\\ipykernel_20424\\3615904657.py:13: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n",
|
237 |
+
" df = df.applymap(lambda x: re.sub(r'\\s+', ' ', x) if isinstance(x, str) else x)\n"
|
238 |
+
]
|
239 |
+
},
|
240 |
+
{
|
241 |
+
"name": "stdout",
|
242 |
+
"output_type": "stream",
|
243 |
+
"text": [
|
244 |
"Total dataset examples: 1044\n",
|
245 |
" natural_query \\\n",
|
246 |
"0 Which NBA teams were established after the yea... \n",
|
|
|
251 |
"\n",
|
252 |
" sql_query result \n",
|
253 |
"0 SELECT full_name FROM team WHERE year_founded ... New Orleans Pelicans \n",
|
254 |
+
"1 SELECT MAX(pts_home) FROM game WHERE team_name... 162 \n",
|
255 |
"2 SELECT pts_home FROM game WHERE team_name_home... 156 \n",
|
256 |
+
"3 SELECT COUNT(*) FROM game WHERE team_abbreviat... 29 \n",
|
257 |
+
"4 SELECT AVG(ast_home) FROM game WHERE team_abbr... 26.51355662 \n"
|
258 |
]
|
259 |
},
|
260 |
{
|
|
|
423 |
"Request:\n",
|
424 |
"\"What is the most points the Los Angeles Lakers have ever scored at home?\"\n",
|
425 |
"SQLite:\n",
|
426 |
+
"SELECT MAX(pts_home) FROM game WHERE team_name_home = 'Los Angeles Lakers';\n",
|
|
|
|
|
427 |
"\n",
|
428 |
"Request:\n",
|
429 |
"\"Which teams are located in the state of California?\"\n",
|
|
|
443 |
"Request:\n",
|
444 |
"\"Find the Boston Celtics largest home victory margin in the 2008 season.\"\n",
|
445 |
"SQLite:\n",
|
446 |
+
"SELECT MAX(pts_home - pts_away) AS biggest_win FROM game WHERE team_name_home = 'Boston Celtics' AND season_id = '22008';\n",
|
|
|
|
|
447 |
"\n",
|
448 |
"Generate only the SQLite query prefaced by SQLite: and no other text, do not output an explanation of the query. Now generate an SQLite query for the following user request. Request:\n",
|
449 |
"Which NBA teams were established after the year 2000? List their names and founding years, sorted from newest to oldest\n",
|
|
|
455 |
"name": "stderr",
|
456 |
"output_type": "stream",
|
457 |
"text": [
|
458 |
+
"Map: 100%|ββββββββββ| 1044/1044 [00:01<00:00, 546.45 examples/s]"
|
459 |
]
|
460 |
},
|
461 |
{
|
|
|
617 |
"Request:\n",
|
618 |
"\"What is the most points the Los Angeles Lakers have ever scored at home?\"\n",
|
619 |
"SQLite:\n",
|
620 |
+
"SELECT MAX(pts_home) FROM game WHERE team_name_home = 'Los Angeles Lakers';\n",
|
|
|
|
|
621 |
"\n",
|
622 |
"Request:\n",
|
623 |
"\"Which teams are located in the state of California?\"\n",
|
|
|
637 |
"Request:\n",
|
638 |
"\"Find the Boston Celtics largest home victory margin in the 2008 season.\"\n",
|
639 |
"SQLite:\n",
|
640 |
+
"SELECT MAX(pts_home - pts_away) AS biggest_win FROM game WHERE team_name_home = 'Boston Celtics' AND season_id = '22008';\n",
|
|
|
|
|
641 |
"\n",
|
642 |
"Generate only the SQLite query prefaced by SQLite: and no other text, do not output an explanation of the query. Now generate an SQLite query for the following user request. Request:\n",
|
643 |
"How many points did the Golden State Warriors score in their first game of the 2005 season?\n",
|
644 |
"SQLite: \n",
|
645 |
+
"SELECT pts_home FROM game WHERE team_abbreviation_home = 'GSW' AND season_id = '22005' ORDER BY game_date ASC LIMIT 1;\n",
|
646 |
"939\n",
|
647 |
"105\n"
|
648 |
]
|
|
|
659 |
"import pandas as pd\n",
|
660 |
"import torch\n",
|
661 |
"from datasets import Dataset\n",
|
662 |
+
"from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig, EarlyStoppingCallback\n",
|
663 |
"from torch.utils.data import DataLoader\n",
|
664 |
"from peft import LoraConfig, get_peft_model, TaskType\n",
|
665 |
"import os\n",
|
666 |
+
"import re\n",
|
667 |
"\n",
|
668 |
"# Load dataset\n",
|
669 |
"df = pd.read_csv(\"./train-data/sql_train.tsv\", sep='\\t')\n",
|
670 |
"\n",
|
671 |
+
"df = df.applymap(lambda x: re.sub(r'\\s+', ' ', x) if isinstance(x, str) else x)\n",
|
672 |
+
"\n",
|
673 |
"# Display dataset info\n",
|
674 |
"print(f\"Total dataset examples: {len(df)}\")\n",
|
675 |
"print(df.head())\n",
|
|
|
726 |
"name": "stdout",
|
727 |
"output_type": "stream",
|
728 |
"text": [
|
729 |
+
"trainable params: 12,582,912 || all params: 1,359,054,848 || trainable%: 0.9259\n"
|
730 |
]
|
731 |
}
|
732 |
],
|
|
|
750 |
"\n",
|
751 |
"# Define LoRA configuration\n",
|
752 |
"lora_config = LoraConfig(\n",
|
753 |
+
" r=32, # Rank of LoRA matrices (adjust for memory vs. accuracy)\n",
|
754 |
" lora_alpha=32, # Scaling factor\n",
|
755 |
" lora_dropout=0.1, # Dropout for regularization\n",
|
756 |
" bias=\"none\",\n",
|
|
|
787 |
"text": [
|
788 |
"c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\training_args.py:1611: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of π€ Transformers. Use `eval_strategy` instead\n",
|
789 |
" warnings.warn(\n",
|
790 |
+
"C:\\Users\\Dean\\AppData\\Local\\Temp\\ipykernel_20424\\92099500.py:20: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n",
|
791 |
" trainer = Trainer(\n",
|
792 |
"No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.\n"
|
793 |
]
|
|
|
800 |
" save_strategy=\"epoch\", # Save model every epoch\n",
|
801 |
" per_device_train_batch_size=16, # LoRA allows higher batch size\n",
|
802 |
" per_device_eval_batch_size=16,\n",
|
803 |
+
" num_train_epochs=50, # Increase if needed\n",
|
804 |
+
" learning_rate=4e-5, # Higher LR since we're only training LoRA layers\n",
|
805 |
" weight_decay=0.01,\n",
|
806 |
" logging_steps=50, # Print loss every 50 steps\n",
|
807 |
+
" save_total_limit=2, # Keep last 4 checkpoints\n",
|
808 |
" fp16=True if torch.cuda.is_available() else False,\n",
|
809 |
+
" push_to_hub=False,\n",
|
810 |
+
" load_best_model_at_end=True,\n",
|
811 |
+
" metric_for_best_model=\"eval_loss\",\n",
|
812 |
+
" greater_is_better=False\n",
|
813 |
")\n",
|
814 |
"\n",
|
815 |
"# Trainer setup\n",
|
|
|
818 |
" args=training_args,\n",
|
819 |
" train_dataset=train_dataset,\n",
|
820 |
" eval_dataset=val_dataset,\n",
|
821 |
+
" tokenizer=tokenizer,\n",
|
822 |
+
" callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]\n",
|
823 |
")"
|
824 |
]
|
825 |
},
|
|
|
849 |
"\n",
|
850 |
" <div>\n",
|
851 |
" \n",
|
852 |
+
" <progress value='708' max='2950' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
853 |
+
" [ 708/2950 2:25:12 < 7:41:08, 0.08 it/s, Epoch 12/50]\n",
|
854 |
" </div>\n",
|
855 |
" <table border=\"1\" class=\"dataframe\">\n",
|
856 |
" <thead>\n",
|
|
|
863 |
" <tbody>\n",
|
864 |
" <tr>\n",
|
865 |
" <td>1</td>\n",
|
866 |
+
" <td>9.571600</td>\n",
|
867 |
+
" <td>1.583262</td>\n",
|
868 |
" </tr>\n",
|
869 |
" <tr>\n",
|
870 |
" <td>2</td>\n",
|
871 |
+
" <td>1.746600</td>\n",
|
872 |
+
" <td>1.167971</td>\n",
|
873 |
" </tr>\n",
|
874 |
" <tr>\n",
|
875 |
" <td>3</td>\n",
|
876 |
+
" <td>1.517300</td>\n",
|
877 |
+
" <td>1.093727</td>\n",
|
878 |
" </tr>\n",
|
879 |
" <tr>\n",
|
880 |
" <td>4</td>\n",
|
881 |
+
" <td>1.423300</td>\n",
|
882 |
+
" <td>1.038791</td>\n",
|
883 |
" </tr>\n",
|
884 |
" <tr>\n",
|
885 |
" <td>5</td>\n",
|
886 |
+
" <td>1.304400</td>\n",
|
887 |
+
" <td>1.066154</td>\n",
|
888 |
+
" </tr>\n",
|
889 |
+
" <tr>\n",
|
890 |
+
" <td>6</td>\n",
|
891 |
+
" <td>1.283900</td>\n",
|
892 |
+
" <td>0.989451</td>\n",
|
893 |
+
" </tr>\n",
|
894 |
+
" <tr>\n",
|
895 |
+
" <td>7</td>\n",
|
896 |
+
" <td>1.248500</td>\n",
|
897 |
+
" <td>0.981647</td>\n",
|
898 |
+
" </tr>\n",
|
899 |
+
" <tr>\n",
|
900 |
+
" <td>8</td>\n",
|
901 |
+
" <td>1.242600</td>\n",
|
902 |
+
" <td>1.007480</td>\n",
|
903 |
+
" </tr>\n",
|
904 |
+
" <tr>\n",
|
905 |
+
" <td>9</td>\n",
|
906 |
+
" <td>1.290300</td>\n",
|
907 |
+
" <td>0.970018</td>\n",
|
908 |
+
" </tr>\n",
|
909 |
+
" <tr>\n",
|
910 |
+
" <td>10</td>\n",
|
911 |
+
" <td>1.258800</td>\n",
|
912 |
+
" <td>0.958510</td>\n",
|
913 |
+
" </tr>\n",
|
914 |
+
" <tr>\n",
|
915 |
+
" <td>11</td>\n",
|
916 |
+
" <td>1.217200</td>\n",
|
917 |
+
" <td>1.017668</td>\n",
|
918 |
+
" </tr>\n",
|
919 |
+
" <tr>\n",
|
920 |
+
" <td>12</td>\n",
|
921 |
+
" <td>1.242000</td>\n",
|
922 |
+
" <td>0.961481</td>\n",
|
923 |
" </tr>\n",
|
924 |
" </tbody>\n",
|
925 |
"</table><p>"
|
|
|
971 |
},
|
972 |
{
|
973 |
"cell_type": "code",
|
974 |
+
"execution_count": 6,
|
975 |
"metadata": {},
|
976 |
"outputs": [
|
977 |
+
{
|
978 |
+
"name": "stderr",
|
979 |
+
"output_type": "stream",
|
980 |
+
"text": [
|
981 |
+
"c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\bitsandbytes\\autograd\\_functions.py:315: UserWarning: MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization\n",
|
982 |
+
" warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n"
|
983 |
+
]
|
984 |
+
},
|
985 |
{
|
986 |
"name": "stdout",
|
987 |
"output_type": "stream",
|
|
|
1001 |
"inputs = tokenizer.apply_chat_template(message, add_generation_prompt=True, return_tensors=\"pt\").to(model.device)\n",
|
1002 |
"\n",
|
1003 |
"# Generate SQL query\n",
|
1004 |
+
"outputs = model.generate(inputs, max_new_tokens=256, do_sample=False)\n",
|
1005 |
"query_output = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)\n",
|
1006 |
"\n",
|
1007 |
"print(\"Generated SQL:\", query_output)"
|
test_finetuned.ipynb
CHANGED
@@ -16,7 +16,7 @@
|
|
16 |
},
|
17 |
{
|
18 |
"cell_type": "code",
|
19 |
-
"execution_count":
|
20 |
"metadata": {},
|
21 |
"outputs": [
|
22 |
{
|
@@ -26,9 +26,9 @@
|
|
26 |
"Total dataset examples: 1044\n",
|
27 |
"\n",
|
28 |
"\n",
|
29 |
-
"
|
30 |
-
"SELECT
|
31 |
-
"
|
32 |
]
|
33 |
}
|
34 |
],
|
@@ -58,7 +58,7 @@
|
|
58 |
},
|
59 |
{
|
60 |
"cell_type": "code",
|
61 |
-
"execution_count":
|
62 |
"metadata": {},
|
63 |
"outputs": [
|
64 |
{
|
@@ -92,11 +92,11 @@
|
|
92 |
},
|
93 |
{
|
94 |
"cell_type": "code",
|
95 |
-
"execution_count":
|
96 |
"metadata": {},
|
97 |
"outputs": [],
|
98 |
"source": [
|
99 |
-
"input_text = \"\"\"You are an AI assistant that converts natural language queries into valid SQLite queries.\n",
|
100 |
"Database Schema and Explanations\n",
|
101 |
"\n",
|
102 |
"team Table\n",
|
@@ -251,9 +251,7 @@
|
|
251 |
"Request:\n",
|
252 |
"\"What is the most points the Los Angeles Lakers have ever scored at home?\"\n",
|
253 |
"SQLite:\n",
|
254 |
-
"SELECT MAX(pts_home)
|
255 |
-
"FROM game \n",
|
256 |
-
"WHERE team_name_home = 'Los Angeles Lakers';\n",
|
257 |
"\n",
|
258 |
"Request:\n",
|
259 |
"\"Which teams are located in the state of California?\"\n",
|
@@ -273,9 +271,7 @@
|
|
273 |
"Request:\n",
|
274 |
"\"Find the Boston Celtics largest home victory margin in the 2008 season.\"\n",
|
275 |
"SQLite:\n",
|
276 |
-
"SELECT MAX(pts_home - pts_away) AS biggest_win
|
277 |
-
"FROM game\n",
|
278 |
-
"WHERE team_name_home = 'Boston Celtics' AND season_id = '22008';\n",
|
279 |
"\n",
|
280 |
"Generate only the SQLite query prefaced by SQLite: and no other text, do not output an explanation of the query. Now generate an SQLite query for the following user request. Request:\n",
|
281 |
"\"\"\""
|
@@ -290,14 +286,14 @@
|
|
290 |
},
|
291 |
{
|
292 |
"cell_type": "code",
|
293 |
-
"execution_count":
|
294 |
"metadata": {},
|
295 |
"outputs": [
|
296 |
{
|
297 |
"name": "stdout",
|
298 |
"output_type": "stream",
|
299 |
"text": [
|
300 |
-
"SQLite: SELECT
|
301 |
"\n"
|
302 |
]
|
303 |
}
|
@@ -322,15 +318,14 @@
|
|
322 |
},
|
323 |
{
|
324 |
"cell_type": "code",
|
325 |
-
"execution_count":
|
326 |
"metadata": {},
|
327 |
"outputs": [
|
328 |
{
|
329 |
"name": "stdout",
|
330 |
"output_type": "stream",
|
331 |
"text": [
|
332 |
-
"SELECT
|
333 |
-
"('12022',)\n"
|
334 |
]
|
335 |
}
|
336 |
],
|
@@ -374,21 +369,21 @@
|
|
374 |
},
|
375 |
{
|
376 |
"cell_type": "code",
|
377 |
-
"execution_count":
|
378 |
"metadata": {},
|
379 |
"outputs": [
|
380 |
{
|
381 |
"name": "stdout",
|
382 |
"output_type": "stream",
|
383 |
"text": [
|
384 |
-
"
|
385 |
-
"SELECT
|
386 |
-
"
|
387 |
-
"SQLite:
|
388 |
"\n",
|
389 |
-
"Statement valid?
|
390 |
"SQLite matched? False\n",
|
391 |
-
"Result matched?
|
392 |
]
|
393 |
}
|
394 |
],
|
@@ -397,10 +392,18 @@
|
|
397 |
"\n",
|
398 |
"def compare_result(sample_query, sample_result, query_output):\n",
|
399 |
" # Clean model output to only have the query output\n",
|
400 |
-
" if query_output[0:8] == \"SQLite
|
401 |
" query = query_output[8:]\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
402 |
" elif query_output[0:5] == \"SQL: \":\n",
|
403 |
" query = query_output[5:]\n",
|
|
|
|
|
404 |
" else:\n",
|
405 |
" query = query_output\n",
|
406 |
"\n",
|
@@ -448,7 +451,7 @@
|
|
448 |
" if math.isclose(float(r), float(res), abs_tol=0.5):\n",
|
449 |
" return True, query_match, True\n",
|
450 |
" except:\n",
|
451 |
-
" if r in res or res in r:\n",
|
452 |
" return True, query_match, True\n",
|
453 |
" \n",
|
454 |
" # Check if the model returned a sum of examples as opposed to the whole thing\n",
|
@@ -494,7 +497,8 @@
|
|
494 |
" return False, False, False\n",
|
495 |
"\n",
|
496 |
"# Obtain sample\n",
|
497 |
-
"
|
|
|
498 |
"print(sample[\"natural_query\"].values[0])\n",
|
499 |
"print(sample[\"sql_query\"].values[0])\n",
|
500 |
"print(sample[\"result\"].values[0])\n",
|
@@ -523,7 +527,7 @@
|
|
523 |
},
|
524 |
{
|
525 |
"cell_type": "code",
|
526 |
-
"execution_count":
|
527 |
"metadata": {},
|
528 |
"outputs": [],
|
529 |
"source": [
|
@@ -571,7 +575,7 @@
|
|
571 |
},
|
572 |
{
|
573 |
"cell_type": "code",
|
574 |
-
"execution_count":
|
575 |
"metadata": {},
|
576 |
"outputs": [
|
577 |
{
|
@@ -584,9 +588,9 @@
|
|
584 |
"Completed 200\n",
|
585 |
"\n",
|
586 |
"Less than 90 results:\n",
|
587 |
-
"Percent valid: 0.
|
588 |
-
"Percent SQLite matched: 0.
|
589 |
-
"Percent result matched: 0.
|
590 |
"Dataset length: 245\n"
|
591 |
]
|
592 |
}
|
@@ -606,51 +610,9 @@
|
|
606 |
},
|
607 |
{
|
608 |
"cell_type": "code",
|
609 |
-
"execution_count":
|
610 |
"metadata": {},
|
611 |
-
"outputs": [
|
612 |
-
{
|
613 |
-
"ename": "KeyboardInterrupt",
|
614 |
-
"evalue": "",
|
615 |
-
"output_type": "error",
|
616 |
-
"traceback": [
|
617 |
-
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
618 |
-
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
619 |
-
"Cell \u001b[1;32mIn[9], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m game_queries \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_csv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./train-data/queries_from_game.tsv\u001b[39m\u001b[38;5;124m\"\u001b[39m, sep\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;130;01m\\t\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m----> 2\u001b[0m \u001b[43mrun_evaluation\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgame_queries\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mQueries from game\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataset length: \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28mlen\u001b[39m(game_queries)))\n",
|
620 |
-
"Cell \u001b[1;32mIn[7], line 10\u001b[0m, in \u001b[0;36mrun_evaluation\u001b[1;34m(nba_df, title)\u001b[0m\n\u001b[0;32m 8\u001b[0m message\u001b[38;5;241m=\u001b[39m[{ \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrole\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124muser\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcontent\u001b[39m\u001b[38;5;124m'\u001b[39m: input_text \u001b[38;5;241m+\u001b[39m row[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnatural_query\u001b[39m\u001b[38;5;124m\"\u001b[39m]}]\n\u001b[0;32m 9\u001b[0m inputs \u001b[38;5;241m=\u001b[39m tokenizer\u001b[38;5;241m.\u001b[39mapply_chat_template(message, add_generation_prompt\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, return_tensors\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpt\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mto(model\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[1;32m---> 10\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_new_tokens\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m128\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdo_sample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtop_k\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m50\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtop_p\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.95\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_return_sequences\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43meos_token_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtokenizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43meos_token_id\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 12\u001b[0m \u001b[38;5;66;03m# Obtain output\u001b[39;00m\n\u001b[0;32m 13\u001b[0m query_output \u001b[38;5;241m=\u001b[39m tokenizer\u001b[38;5;241m.\u001b[39mdecode(outputs[\u001b[38;5;241m0\u001b[39m][\u001b[38;5;28mlen\u001b[39m(inputs[\u001b[38;5;241m0\u001b[39m]):], skip_special_tokens\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
|
621 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\utils\\_contextlib.py:116\u001b[0m, in \u001b[0;36mcontext_decorator.<locals>.decorate_context\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 113\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[0;32m 114\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m 115\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[1;32m--> 116\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
622 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\generation\\utils.py:2326\u001b[0m, in \u001b[0;36mGenerationMixin.generate\u001b[1;34m(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, use_model_defaults, **kwargs)\u001b[0m\n\u001b[0;32m 2318\u001b[0m input_ids, model_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_expand_inputs_for_generation(\n\u001b[0;32m 2319\u001b[0m input_ids\u001b[38;5;241m=\u001b[39minput_ids,\n\u001b[0;32m 2320\u001b[0m expand_size\u001b[38;5;241m=\u001b[39mgeneration_config\u001b[38;5;241m.\u001b[39mnum_return_sequences,\n\u001b[0;32m 2321\u001b[0m is_encoder_decoder\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mis_encoder_decoder,\n\u001b[0;32m 2322\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmodel_kwargs,\n\u001b[0;32m 2323\u001b[0m )\n\u001b[0;32m 2325\u001b[0m \u001b[38;5;66;03m# 12. run sample (it degenerates to greedy search when `generation_config.do_sample=False`)\u001b[39;00m\n\u001b[1;32m-> 2326\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sample\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2327\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2328\u001b[0m \u001b[43m \u001b[49m\u001b[43mlogits_processor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprepared_logits_processor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2329\u001b[0m \u001b[43m \u001b[49m\u001b[43mstopping_criteria\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprepared_stopping_criteria\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2330\u001b[0m \u001b[43m \u001b[49m\u001b[43mgeneration_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgeneration_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2331\u001b[0m \u001b[43m \u001b[49m\u001b[43msynced_gpus\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msynced_gpus\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2332\u001b[0m \u001b[43m \u001b[49m\u001b[43mstreamer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstreamer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2333\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2334\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2336\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m generation_mode \u001b[38;5;129;01min\u001b[39;00m (GenerationMode\u001b[38;5;241m.\u001b[39mBEAM_SAMPLE, GenerationMode\u001b[38;5;241m.\u001b[39mBEAM_SEARCH):\n\u001b[0;32m 2337\u001b[0m \u001b[38;5;66;03m# 11. interleave input_ids with `num_beams` additional sequences per batch\u001b[39;00m\n\u001b[0;32m 2338\u001b[0m input_ids, model_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_expand_inputs_for_generation(\n\u001b[0;32m 2339\u001b[0m input_ids\u001b[38;5;241m=\u001b[39minput_ids,\n\u001b[0;32m 2340\u001b[0m expand_size\u001b[38;5;241m=\u001b[39mgeneration_config\u001b[38;5;241m.\u001b[39mnum_beams,\n\u001b[0;32m 2341\u001b[0m is_encoder_decoder\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mis_encoder_decoder,\n\u001b[0;32m 2342\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmodel_kwargs,\n\u001b[0;32m 2343\u001b[0m )\n",
|
623 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\generation\\utils.py:3289\u001b[0m, in \u001b[0;36mGenerationMixin._sample\u001b[1;34m(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, **model_kwargs)\u001b[0m\n\u001b[0;32m 3287\u001b[0m is_prefill \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m 3288\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 3289\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_inputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[0;32m 3291\u001b[0m \u001b[38;5;66;03m# synced_gpus: don't waste resources running the code we don't need; kwargs must be updated before skipping\u001b[39;00m\n\u001b[0;32m 3292\u001b[0m model_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_update_model_kwargs_for_generation(\n\u001b[0;32m 3293\u001b[0m outputs,\n\u001b[0;32m 3294\u001b[0m model_kwargs,\n\u001b[0;32m 3295\u001b[0m is_encoder_decoder\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mis_encoder_decoder,\n\u001b[0;32m 3296\u001b[0m )\n",
|
624 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m 1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
625 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1557\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m 1558\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m 1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m 1560\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m 1561\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1562\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 1565\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
|
626 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\accelerate\\hooks.py:170\u001b[0m, in \u001b[0;36madd_hook_to_module.<locals>.new_forward\u001b[1;34m(module, *args, **kwargs)\u001b[0m\n\u001b[0;32m 168\u001b[0m output \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39m_old_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 169\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 170\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_old_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 171\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m module\u001b[38;5;241m.\u001b[39m_hf_hook\u001b[38;5;241m.\u001b[39mpost_forward(module, output)\n",
|
627 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\utils\\deprecation.py:172\u001b[0m, in \u001b[0;36mdeprecate_kwarg.<locals>.wrapper.<locals>.wrapped_func\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 168\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m minimum_action \u001b[38;5;129;01min\u001b[39;00m (Action\u001b[38;5;241m.\u001b[39mNOTIFY, Action\u001b[38;5;241m.\u001b[39mNOTIFY_ALWAYS) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torchdynamo_compiling():\n\u001b[0;32m 169\u001b[0m \u001b[38;5;66;03m# DeprecationWarning is ignored by default, so we use FutureWarning instead\u001b[39;00m\n\u001b[0;32m 170\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(message, \u001b[38;5;167;01mFutureWarning\u001b[39;00m, stacklevel\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2\u001b[39m)\n\u001b[1;32m--> 172\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
628 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\models\\llama\\modeling_llama.py:853\u001b[0m, in \u001b[0;36mLlamaForCausalLM.forward\u001b[1;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, cache_position, logits_to_keep, **kwargs)\u001b[0m\n\u001b[0;32m 850\u001b[0m return_dict \u001b[38;5;241m=\u001b[39m return_dict \u001b[38;5;28;01mif\u001b[39;00m return_dict \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39muse_return_dict\n\u001b[0;32m 852\u001b[0m \u001b[38;5;66;03m# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)\u001b[39;00m\n\u001b[1;32m--> 853\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 854\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 855\u001b[0m \u001b[43m \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 856\u001b[0m \u001b[43m \u001b[49m\u001b[43mposition_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mposition_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 857\u001b[0m \u001b[43m \u001b[49m\u001b[43mpast_key_values\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpast_key_values\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 858\u001b[0m \u001b[43m \u001b[49m\u001b[43minputs_embeds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs_embeds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 859\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_cache\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 860\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 861\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 862\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 863\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_position\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_position\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 864\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 865\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 867\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m outputs[\u001b[38;5;241m0\u001b[39m]\n\u001b[0;32m 868\u001b[0m \u001b[38;5;66;03m# Only compute necessary logits, and do not upcast them to float if we are not computing the loss\u001b[39;00m\n",
|
629 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m 1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
630 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1557\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m 1558\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m 1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m 1560\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m 1561\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1562\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 1565\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
|
631 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\accelerate\\hooks.py:170\u001b[0m, in \u001b[0;36madd_hook_to_module.<locals>.new_forward\u001b[1;34m(module, *args, **kwargs)\u001b[0m\n\u001b[0;32m 168\u001b[0m output \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39m_old_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 169\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 170\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_old_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 171\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m module\u001b[38;5;241m.\u001b[39m_hf_hook\u001b[38;5;241m.\u001b[39mpost_forward(module, output)\n",
|
632 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\models\\llama\\modeling_llama.py:601\u001b[0m, in \u001b[0;36mLlamaModel.forward\u001b[1;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict, cache_position, **flash_attn_kwargs)\u001b[0m\n\u001b[0;32m 589\u001b[0m layer_outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_gradient_checkpointing_func(\n\u001b[0;32m 590\u001b[0m decoder_layer\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__call__\u001b[39m,\n\u001b[0;32m 591\u001b[0m hidden_states,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 598\u001b[0m position_embeddings,\n\u001b[0;32m 599\u001b[0m )\n\u001b[0;32m 600\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 601\u001b[0m layer_outputs \u001b[38;5;241m=\u001b[39m \u001b[43mdecoder_layer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 602\u001b[0m \u001b[43m \u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 603\u001b[0m \u001b[43m \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcausal_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 604\u001b[0m \u001b[43m \u001b[49m\u001b[43mposition_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mposition_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 605\u001b[0m \u001b[43m \u001b[49m\u001b[43mpast_key_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpast_key_values\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 606\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 607\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_cache\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 608\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_position\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_position\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 609\u001b[0m \u001b[43m \u001b[49m\u001b[43mposition_embeddings\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mposition_embeddings\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 610\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mflash_attn_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 611\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 613\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m layer_outputs[\u001b[38;5;241m0\u001b[39m]\n\u001b[0;32m 615\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m output_attentions:\n",
|
633 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m 1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
634 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1557\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m 1558\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m 1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m 1560\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m 1561\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1562\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 1565\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
|
635 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\accelerate\\hooks.py:170\u001b[0m, in \u001b[0;36madd_hook_to_module.<locals>.new_forward\u001b[1;34m(module, *args, **kwargs)\u001b[0m\n\u001b[0;32m 168\u001b[0m output \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39m_old_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 169\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 170\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_old_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 171\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m module\u001b[38;5;241m.\u001b[39m_hf_hook\u001b[38;5;241m.\u001b[39mpost_forward(module, output)\n",
|
636 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\models\\llama\\modeling_llama.py:343\u001b[0m, in \u001b[0;36mLlamaDecoderLayer.forward\u001b[1;34m(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, cache_position, position_embeddings, **kwargs)\u001b[0m\n\u001b[0;32m 340\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minput_layernorm(hidden_states)\n\u001b[0;32m 342\u001b[0m \u001b[38;5;66;03m# Self Attention\u001b[39;00m\n\u001b[1;32m--> 343\u001b[0m hidden_states, self_attn_weights \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mself_attn\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 344\u001b[0m \u001b[43m \u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 345\u001b[0m \u001b[43m \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 346\u001b[0m \u001b[43m \u001b[49m\u001b[43mposition_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mposition_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 347\u001b[0m \u001b[43m \u001b[49m\u001b[43mpast_key_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpast_key_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 348\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 349\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_cache\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 350\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_position\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_position\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 351\u001b[0m \u001b[43m \u001b[49m\u001b[43mposition_embeddings\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mposition_embeddings\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 352\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 353\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 354\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m residual \u001b[38;5;241m+\u001b[39m hidden_states\n\u001b[0;32m 356\u001b[0m \u001b[38;5;66;03m# Fully Connected\u001b[39;00m\n",
|
637 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m 1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
638 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1557\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m 1558\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m 1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m 1560\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m 1561\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1562\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 1565\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
|
639 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\accelerate\\hooks.py:170\u001b[0m, in \u001b[0;36madd_hook_to_module.<locals>.new_forward\u001b[1;34m(module, *args, **kwargs)\u001b[0m\n\u001b[0;32m 168\u001b[0m output \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39m_old_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 169\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 170\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_old_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 171\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m module\u001b[38;5;241m.\u001b[39m_hf_hook\u001b[38;5;241m.\u001b[39mpost_forward(module, output)\n",
|
640 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\models\\llama\\modeling_llama.py:277\u001b[0m, in \u001b[0;36mLlamaAttention.forward\u001b[1;34m(self, hidden_states, position_embeddings, attention_mask, past_key_value, cache_position, **kwargs)\u001b[0m\n\u001b[0;32m 274\u001b[0m input_shape \u001b[38;5;241m=\u001b[39m hidden_states\u001b[38;5;241m.\u001b[39mshape[:\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n\u001b[0;32m 275\u001b[0m hidden_shape \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m*\u001b[39minput_shape, \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhead_dim)\n\u001b[1;32m--> 277\u001b[0m query_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mq_proj\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mview(hidden_shape)\u001b[38;5;241m.\u001b[39mtranspose(\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m)\n\u001b[0;32m 278\u001b[0m key_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mk_proj(hidden_states)\u001b[38;5;241m.\u001b[39mview(hidden_shape)\u001b[38;5;241m.\u001b[39mtranspose(\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m)\n\u001b[0;32m 279\u001b[0m value_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mv_proj(hidden_states)\u001b[38;5;241m.\u001b[39mview(hidden_shape)\u001b[38;5;241m.\u001b[39mtranspose(\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m)\n",
|
641 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m 1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
642 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1557\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m 1558\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m 1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m 1560\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m 1561\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1562\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 1565\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
|
643 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\accelerate\\hooks.py:170\u001b[0m, in \u001b[0;36madd_hook_to_module.<locals>.new_forward\u001b[1;34m(module, *args, **kwargs)\u001b[0m\n\u001b[0;32m 168\u001b[0m output \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39m_old_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 169\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 170\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_old_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 171\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m module\u001b[38;5;241m.\u001b[39m_hf_hook\u001b[38;5;241m.\u001b[39mpost_forward(module, output)\n",
|
644 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\bitsandbytes\\nn\\modules.py:990\u001b[0m, in \u001b[0;36mLinear8bitLt.forward\u001b[1;34m(self, x)\u001b[0m\n\u001b[0;32m 987\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbias \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbias\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m!=\u001b[39m x\u001b[38;5;241m.\u001b[39mdtype:\n\u001b[0;32m 988\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbias\u001b[38;5;241m.\u001b[39mdata \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbias\u001b[38;5;241m.\u001b[39mdata\u001b[38;5;241m.\u001b[39mto(x\u001b[38;5;241m.\u001b[39mdtype)\n\u001b[1;32m--> 990\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[43mbnb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmatmul\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbias\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbias\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstate\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 992\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mhas_fp16_weights \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mCB \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 993\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mweight\u001b[38;5;241m.\u001b[39mdata \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mCB\n",
|
645 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\bitsandbytes\\autograd\\_functions.py:509\u001b[0m, in \u001b[0;36mmatmul\u001b[1;34m(A, B, out, state, threshold, bias)\u001b[0m\n\u001b[0;32m 507\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m threshold \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0.0\u001b[39m:\n\u001b[0;32m 508\u001b[0m state\u001b[38;5;241m.\u001b[39mthreshold \u001b[38;5;241m=\u001b[39m threshold\n\u001b[1;32m--> 509\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mMatMul8bitLt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mA\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mB\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbias\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m)\u001b[49m\n",
|
646 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\autograd\\function.py:574\u001b[0m, in \u001b[0;36mFunction.apply\u001b[1;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[0;32m 571\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m torch\u001b[38;5;241m.\u001b[39m_C\u001b[38;5;241m.\u001b[39m_are_functorch_transforms_active():\n\u001b[0;32m 572\u001b[0m \u001b[38;5;66;03m# See NOTE: [functorch vjp and autograd interaction]\u001b[39;00m\n\u001b[0;32m 573\u001b[0m args \u001b[38;5;241m=\u001b[39m _functorch\u001b[38;5;241m.\u001b[39mutils\u001b[38;5;241m.\u001b[39munwrap_dead_wrappers(args)\n\u001b[1;32m--> 574\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m 576\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_setup_ctx_defined:\n\u001b[0;32m 577\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[0;32m 578\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIn order to use an autograd.Function with functorch transforms \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 579\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(vmap, grad, jvp, jacrev, ...), it must override the setup_context \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 580\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstaticmethod. For more details, please see \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 581\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhttps://pytorch.org/docs/main/notes/extending.func.html\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 582\u001b[0m )\n",
|
647 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\bitsandbytes\\autograd\\_functions.py:326\u001b[0m, in \u001b[0;36mMatMul8bitLt.forward\u001b[1;34m(ctx, A, B, out, bias, state)\u001b[0m\n\u001b[0;32m 323\u001b[0m CA, CAt, SCA, SCAt, outlier_cols \u001b[38;5;241m=\u001b[39m F\u001b[38;5;241m.\u001b[39mint8_double_quant(A\u001b[38;5;241m.\u001b[39mto(torch\u001b[38;5;241m.\u001b[39mfloat16), threshold\u001b[38;5;241m=\u001b[39mstate\u001b[38;5;241m.\u001b[39mthreshold)\n\u001b[0;32m 324\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 325\u001b[0m \u001b[38;5;66;03m# Fast path\u001b[39;00m\n\u001b[1;32m--> 326\u001b[0m CA, SCA, outlier_cols \u001b[38;5;241m=\u001b[39m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mint8_vectorwise_quant\u001b[49m\u001b[43m(\u001b[49m\u001b[43mA\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloat16\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mthreshold\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mthreshold\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 327\u001b[0m CAt \u001b[38;5;241m=\u001b[39m SCAt \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 329\u001b[0m has_grad \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n",
|
648 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\bitsandbytes\\functional.py:2789\u001b[0m, in \u001b[0;36mint8_vectorwise_quant\u001b[1;34m(A, threshold)\u001b[0m\n\u001b[0;32m 2786\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m outliers\u001b[38;5;241m.\u001b[39many():\n\u001b[0;32m 2787\u001b[0m outlier_cols \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39margwhere(outliers\u001b[38;5;241m.\u001b[39many(dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m))\u001b[38;5;241m.\u001b[39mview(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m-> 2789\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43m_cuda_device_of\u001b[49m\u001b[43m(\u001b[49m\u001b[43mA\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[0;32m 2790\u001b[0m lib\u001b[38;5;241m.\u001b[39mcint8_vector_quant(\n\u001b[0;32m 2791\u001b[0m get_ptr(A),\n\u001b[0;32m 2792\u001b[0m get_ptr(out_row),\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 2797\u001b[0m _get_tensor_stream(A),\n\u001b[0;32m 2798\u001b[0m )\n\u001b[0;32m 2800\u001b[0m \u001b[38;5;66;03m# Zero out values from outlier columns across all rows.\u001b[39;00m\n\u001b[0;32m 2801\u001b[0m \u001b[38;5;66;03m# The kernel will handle this for outliers themselves, so we can optimize for rows=1.\u001b[39;00m\n",
|
649 |
-
"File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\bitsandbytes\\functional.py:205\u001b[0m, in \u001b[0;36m_cuda_device_of\u001b[1;34m(a)\u001b[0m\n\u001b[0;32m 202\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 203\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mcontextlib\u001b[39;00m\n\u001b[1;32m--> 205\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_cuda_device_of\u001b[39m(a: torch\u001b[38;5;241m.\u001b[39mTensor):\n\u001b[0;32m 206\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m contextlib\u001b[38;5;241m.\u001b[39mnullcontext()\n\u001b[0;32m 209\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_paged\u001b[39m(\u001b[38;5;241m*\u001b[39mshape, dtype\u001b[38;5;241m=\u001b[39mtorch\u001b[38;5;241m.\u001b[39mfloat32, device\u001b[38;5;241m=\u001b[39mFIRST_CUDA_DEVICE):\n",
|
650 |
-
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
|
651 |
-
]
|
652 |
-
}
|
653 |
-
],
|
654 |
"source": [
|
655 |
"game_queries = pd.read_csv(\"./train-data/queries_from_game.tsv\", sep='\\t')\n",
|
656 |
"run_evaluation(game_queries, \"Queries from game\")\n",
|
|
|
16 |
},
|
17 |
{
|
18 |
"cell_type": "code",
|
19 |
+
"execution_count": 10,
|
20 |
"metadata": {},
|
21 |
"outputs": [
|
22 |
{
|
|
|
26 |
"Total dataset examples: 1044\n",
|
27 |
"\n",
|
28 |
"\n",
|
29 |
+
"What is the average number of points in the paint allowed by the Chicago Bulls when playing at home in the 2001 season in games with more than 15 lead changes?\n",
|
30 |
+
"SELECT AVG(o.pts_paint_away) FROM game g JOIN other_stats o ON g.game_id = o.game_id WHERE g.team_abbreviation_home = 'CHI' AND g.season_id = '22001' AND o.lead_changes > 15;\n",
|
31 |
+
"31.333333333333332\n"
|
32 |
]
|
33 |
}
|
34 |
],
|
|
|
58 |
},
|
59 |
{
|
60 |
"cell_type": "code",
|
61 |
+
"execution_count": 11,
|
62 |
"metadata": {},
|
63 |
"outputs": [
|
64 |
{
|
|
|
92 |
},
|
93 |
{
|
94 |
"cell_type": "code",
|
95 |
+
"execution_count": 12,
|
96 |
"metadata": {},
|
97 |
"outputs": [],
|
98 |
"source": [
|
99 |
+
"input_text = input_prompt = \"\"\"You are an AI assistant that converts natural language queries into valid SQLite queries.\n",
|
100 |
"Database Schema and Explanations\n",
|
101 |
"\n",
|
102 |
"team Table\n",
|
|
|
251 |
"Request:\n",
|
252 |
"\"What is the most points the Los Angeles Lakers have ever scored at home?\"\n",
|
253 |
"SQLite:\n",
|
254 |
+
"SELECT MAX(pts_home) FROM game WHERE team_name_home = 'Los Angeles Lakers';\n",
|
|
|
|
|
255 |
"\n",
|
256 |
"Request:\n",
|
257 |
"\"Which teams are located in the state of California?\"\n",
|
|
|
271 |
"Request:\n",
|
272 |
"\"Find the Boston Celtics largest home victory margin in the 2008 season.\"\n",
|
273 |
"SQLite:\n",
|
274 |
+
"SELECT MAX(pts_home - pts_away) AS biggest_win FROM game WHERE team_name_home = 'Boston Celtics' AND season_id = '22008';\n",
|
|
|
|
|
275 |
"\n",
|
276 |
"Generate only the SQLite query prefaced by SQLite: and no other text, do not output an explanation of the query. Now generate an SQLite query for the following user request. Request:\n",
|
277 |
"\"\"\""
|
|
|
286 |
},
|
287 |
{
|
288 |
"cell_type": "code",
|
289 |
+
"execution_count": 13,
|
290 |
"metadata": {},
|
291 |
"outputs": [
|
292 |
{
|
293 |
"name": "stdout",
|
294 |
"output_type": "stream",
|
295 |
"text": [
|
296 |
+
"SQLite: SELECT AVG(pts_paint_home) FROM other_stats WHERE team_name_home = 'Chicago Bulls' AND season_id = '22001' AND lead_changes > 15;\n",
|
297 |
"\n"
|
298 |
]
|
299 |
}
|
|
|
318 |
},
|
319 |
{
|
320 |
"cell_type": "code",
|
321 |
+
"execution_count": 14,
|
322 |
"metadata": {},
|
323 |
"outputs": [
|
324 |
{
|
325 |
"name": "stdout",
|
326 |
"output_type": "stream",
|
327 |
"text": [
|
328 |
+
"SELECT AVG(pts_paint_home) FROM other_stats WHERE team_name_home = 'Chicago Bulls' AND season_id = '22001' AND lead_changes > 15;\n"
|
|
|
329 |
]
|
330 |
}
|
331 |
],
|
|
|
369 |
},
|
370 |
{
|
371 |
"cell_type": "code",
|
372 |
+
"execution_count": 15,
|
373 |
"metadata": {},
|
374 |
"outputs": [
|
375 |
{
|
376 |
"name": "stdout",
|
377 |
"output_type": "stream",
|
378 |
"text": [
|
379 |
+
"What is the average number of fg_pct in home games by the Los Angeles Lakers?\n",
|
380 |
+
"SELECT AVG(fg_pct_home) FROM game WHERE team_name_home = 'Los Angeles Lakers';\n",
|
381 |
+
"0.4782432016418667\n",
|
382 |
+
"SQLite: AVG(fg_pct_home) FROM game WHERE team_name_home = 'Los Angeles Lakers';\n",
|
383 |
"\n",
|
384 |
+
"Statement valid? False\n",
|
385 |
"SQLite matched? False\n",
|
386 |
+
"Result matched? False\n"
|
387 |
]
|
388 |
}
|
389 |
],
|
|
|
392 |
"\n",
|
393 |
"def compare_result(sample_query, sample_result, query_output):\n",
|
394 |
" # Clean model output to only have the query output\n",
|
395 |
+
" if query_output[0:8] == \"SQLite:\\n\":\n",
|
396 |
" query = query_output[8:]\n",
|
397 |
+
" elif query_output[0:8] == \"SQLite: \":\n",
|
398 |
+
" query = query_output[8:]\n",
|
399 |
+
" elif query_output[0:7] == \"SQLite:\":\n",
|
400 |
+
" query = query_output[7:]\n",
|
401 |
+
" elif query_output[0:5] == \"SQL:\\n\":\n",
|
402 |
+
" query = query_output[5:]\n",
|
403 |
" elif query_output[0:5] == \"SQL: \":\n",
|
404 |
" query = query_output[5:]\n",
|
405 |
+
" elif query_output[0:4] == \"SQL:\":\n",
|
406 |
+
" query = query_output[4:]\n",
|
407 |
" else:\n",
|
408 |
" query = query_output\n",
|
409 |
"\n",
|
|
|
451 |
" if math.isclose(float(r), float(res), abs_tol=0.5):\n",
|
452 |
" return True, query_match, True\n",
|
453 |
" except:\n",
|
454 |
+
" if str(r) in res or res in str(r):\n",
|
455 |
" return True, query_match, True\n",
|
456 |
" \n",
|
457 |
" # Check if the model returned a sum of examples as opposed to the whole thing\n",
|
|
|
497 |
" return False, False, False\n",
|
498 |
"\n",
|
499 |
"# Obtain sample\n",
|
500 |
+
"less_than_90_df = pd.read_csv(\"./train-data/less_than_90.tsv\", sep='\\t')\n",
|
501 |
+
"sample = less_than_90_df.sample(n=1)\n",
|
502 |
"print(sample[\"natural_query\"].values[0])\n",
|
503 |
"print(sample[\"sql_query\"].values[0])\n",
|
504 |
"print(sample[\"result\"].values[0])\n",
|
|
|
527 |
},
|
528 |
{
|
529 |
"cell_type": "code",
|
530 |
+
"execution_count": 16,
|
531 |
"metadata": {},
|
532 |
"outputs": [],
|
533 |
"source": [
|
|
|
575 |
},
|
576 |
{
|
577 |
"cell_type": "code",
|
578 |
+
"execution_count": 17,
|
579 |
"metadata": {},
|
580 |
"outputs": [
|
581 |
{
|
|
|
588 |
"Completed 200\n",
|
589 |
"\n",
|
590 |
"Less than 90 results:\n",
|
591 |
+
"Percent valid: 0.49795918367346936\n",
|
592 |
+
"Percent SQLite matched: 0.27346938775510204\n",
|
593 |
+
"Percent result matched: 0.4122448979591837\n",
|
594 |
"Dataset length: 245\n"
|
595 |
]
|
596 |
}
|
|
|
610 |
},
|
611 |
{
|
612 |
"cell_type": "code",
|
613 |
+
"execution_count": null,
|
614 |
"metadata": {},
|
615 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
616 |
"source": [
|
617 |
"game_queries = pd.read_csv(\"./train-data/queries_from_game.tsv\", sep='\\t')\n",
|
618 |
"run_evaluation(game_queries, \"Queries from game\")\n",
|