Spaces:
Runtime error
Runtime error
Ffftdtd5dtft
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -196,73 +196,34 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
196 |
```
|
197 |
cd llama.cpp && LLAMA_CURL=1 make
|
198 |
```
|
199 |
-
Step 3:
|
200 |
```
|
201 |
-
|
202 |
```
|
203 |
-
"""
|
204 |
)
|
205 |
-
if
|
206 |
-
|
207 |
-
else:
|
208 |
-
api.upload_file(
|
209 |
-
path_or_fileobj=quantized_gguf_path,
|
210 |
-
path_in_repo=quantized_gguf_name,
|
211 |
-
repo_id=new_repo_id,
|
212 |
-
)
|
213 |
-
card.push_to_hub(repo_id=new_repo_id, token=oauth_token.token)
|
214 |
-
print("Quantized model uploaded and model card created successfully!")
|
215 |
-
return f"Quantized model uploaded to: {new_repo_url}"
|
216 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
217 |
except Exception as e:
|
218 |
-
print(
|
219 |
-
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
|
221 |
-
|
222 |
-
hf_token_input = gr.Textbox(label="HF Token", type="password", value=HF_TOKEN, visible=False, interactive=False)
|
223 |
-
hf_token = gr.oauth.OAuth(hf_token_input)
|
224 |
-
|
225 |
-
model_id = HuggingfaceHubSearch(label="Select a model from HuggingFace Hub")
|
226 |
-
|
227 |
-
quantization_method = gr.Dropdown(
|
228 |
-
["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"], label="Select quantization method")
|
229 |
-
imatrix_quantization_method = gr.Dropdown(
|
230 |
-
["IQ3_M", "IQ3_XXS", "Q4_K_M", "Q4_K_S", "IQ4_NL", "IQ4_XS", "Q5_K_M", "Q5_K_S"], label="Select imatrix quantization method", visible=False)
|
231 |
-
use_imatrix_checkbox = gr.Checkbox(label="Use imatrix")
|
232 |
-
private_repo_checkbox = gr.Checkbox(label="Create a private repo")
|
233 |
-
train_data_upload = gr.File(label="Upload train data for imatrix (optional)", visible=False)
|
234 |
-
split_model_checkbox = gr.Checkbox(label="Split model", visible=False)
|
235 |
-
split_max_tensors = gr.Number(label="Split Max Tensors", visible=False)
|
236 |
-
split_max_size = gr.Number(label="Split Max Size (MB)", visible=False)
|
237 |
-
|
238 |
-
quantized_model_output = gr.Textbox(label="Output")
|
239 |
-
|
240 |
-
use_imatrix_checkbox.change(fn=lambda x: [
|
241 |
-
imatrix_quantization_method.update(visible=x),
|
242 |
-
train_data_upload.update(visible=x),
|
243 |
-
split_model_checkbox.update(visible=x),
|
244 |
-
split_max_tensors.update(visible=x),
|
245 |
-
split_max_size.update(visible=x)
|
246 |
-
], inputs=use_imatrix_checkbox, outputs=[imatrix_quantization_method, train_data_upload, split_model_checkbox, split_max_tensors, split_max_size])
|
247 |
-
|
248 |
-
process_button = gr.Button(label="Quantize and Upload")
|
249 |
-
|
250 |
-
process_button.click(
|
251 |
-
process_model,
|
252 |
-
inputs=[
|
253 |
-
model_id,
|
254 |
-
quantization_method,
|
255 |
-
use_imatrix_checkbox,
|
256 |
-
imatrix_quantization_method,
|
257 |
-
private_repo_checkbox,
|
258 |
-
train_data_upload,
|
259 |
-
split_model_checkbox,
|
260 |
-
split_max_tensors,
|
261 |
-
split_max_size,
|
262 |
-
hf_token
|
263 |
-
],
|
264 |
-
outputs=[quantized_model_output],
|
265 |
-
)
|
266 |
|
267 |
-
if __name__ == "__main__":
|
268 |
-
demo.launch()
|
|
|
196 |
```
|
197 |
cd llama.cpp && LLAMA_CURL=1 make
|
198 |
```
|
199 |
+
Step 3: Fetch model weights from HF using curl command and use them with the above `llama_cli` or `llama_server`.
|
200 |
```
|
201 |
+
curl -L {new_repo_id} > .gguf/{quantized_gguf_name}
|
202 |
```
|
203 |
+
"""
|
204 |
)
|
205 |
+
if use_imatrix:
|
206 |
+
card.text += "\nNote: This model was quantized using imatrix."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
|
208 |
+
card.push_to_hub(repo_id=new_repo_id, token=oauth_token.token)
|
209 |
+
api.upload_file(
|
210 |
+
path_or_fileobj=quantized_gguf_path,
|
211 |
+
path_in_repo=quantized_gguf_name,
|
212 |
+
repo_id=new_repo_id,
|
213 |
+
token=oauth_token.token,
|
214 |
+
)
|
215 |
+
print("Pushed model to the hub!")
|
216 |
+
if split_model:
|
217 |
+
split_upload_model(quantized_gguf_name, new_repo_id, oauth_token, split_max_tensors=split_max_tensors, split_max_size=split_max_size)
|
218 |
except Exception as e:
|
219 |
+
print("Error in process_model:", e)
|
220 |
+
raise e
|
221 |
+
finally:
|
222 |
+
os.makedirs("model_cache", exist_ok=True)
|
223 |
+
shutil.move(model_name, f"model_cache/{model_name}")
|
224 |
+
shutil.move(fp16, f"model_cache/{fp16}")
|
225 |
+
shutil.move(quantized_gguf_path, f"model_cache/{quantized_gguf_path}")
|
226 |
+
print("Moved model files to model_cache.")
|
227 |
|
228 |
+
print("Process completed successfully!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
|
|
|
|