Spaces:
Runtime error
Runtime error
Ffftdtd5dtft
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -3,17 +3,18 @@ import shutil
|
|
3 |
import subprocess
|
4 |
import signal
|
5 |
import gradio as gr
|
6 |
-
from huggingface_hub import create_repo, HfApi, whoami, ModelCard
|
7 |
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
8 |
from apscheduler.schedulers.background import BackgroundScheduler
|
9 |
from textwrap import dedent
|
10 |
|
|
|
11 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
12 |
|
13 |
def ensure_valid_token(oauth_token):
|
14 |
-
if oauth_token
|
15 |
raise ValueError("You must be logged in.")
|
16 |
-
return oauth_token.
|
17 |
|
18 |
def generate_importance_matrix(model_path, train_data_path):
|
19 |
imatrix_command = f"./llama-imatrix -m ../{model_path} -f {train_data_path} -ngl 99 --output-frequency 10"
|
@@ -37,14 +38,16 @@ def generate_importance_matrix(model_path, train_data_path):
|
|
37 |
try:
|
38 |
process.wait(timeout=5)
|
39 |
except subprocess.TimeoutExpired:
|
40 |
-
print("Imatrix
|
41 |
process.kill()
|
42 |
|
43 |
os.chdir("..")
|
|
|
44 |
print("Importance matrix generation completed.")
|
45 |
|
46 |
def split_upload_model(model_path, repo_id, oauth_token, split_max_tensors=256, split_max_size=None):
|
47 |
-
|
|
|
48 |
|
49 |
split_cmd = f"llama.cpp/llama-gguf-split --split --split-max-tensors {split_max_tensors}"
|
50 |
if split_max_size:
|
@@ -60,11 +63,11 @@ def split_upload_model(model_path, repo_id, oauth_token, split_max_tensors=256,
|
|
60 |
if result.returncode != 0:
|
61 |
raise Exception(f"Error splitting the model: {result.stderr}")
|
62 |
print("Model split successfully!")
|
63 |
-
|
64 |
sharded_model_files = [f for f in os.listdir('.') if f.startswith(model_path.split('.')[0])]
|
65 |
if sharded_model_files:
|
66 |
print(f"Sharded model files: {sharded_model_files}")
|
67 |
-
api = HfApi(token=
|
68 |
for file in sharded_model_files:
|
69 |
file_path = os.path.join('.', file)
|
70 |
print(f"Uploading file: {file_path}")
|
@@ -201,7 +204,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
201 |
card.save(f"README.md")
|
202 |
|
203 |
if split_model:
|
204 |
-
split_upload_model(quantized_gguf_path, new_repo_id,
|
205 |
else:
|
206 |
try:
|
207 |
print(f"Uploading quantized model: {quantized_gguf_path}")
|
@@ -241,123 +244,32 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
241 |
shutil.rmtree(model_name, ignore_errors=True)
|
242 |
print("Folder cleaned up successfully!")
|
243 |
|
244 |
-
|
245 |
-
.
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
label="
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
[
|
270 |
-
label="Imatrix Quantization Method",
|
271 |
-
info="GGML imatrix quants type",
|
272 |
-
value="IQ4_NL",
|
273 |
-
filterable=False,
|
274 |
-
visible=False
|
275 |
-
)
|
276 |
-
|
277 |
-
use_imatrix = gr.Checkbox(
|
278 |
-
value=False,
|
279 |
-
label="Use Imatrix Quantization",
|
280 |
-
info="Use importance matrix for quantization."
|
281 |
-
)
|
282 |
-
|
283 |
-
private_repo = gr.Checkbox(
|
284 |
-
value=False,
|
285 |
-
label="Private Repo",
|
286 |
-
info="Create a private repo under your username."
|
287 |
-
)
|
288 |
-
|
289 |
-
train_data_file = gr.File(
|
290 |
-
label="Training Data File",
|
291 |
-
file_types=["txt"],
|
292 |
-
visible=False
|
293 |
)
|
294 |
|
295 |
-
|
296 |
-
value=False,
|
297 |
-
label="Split Model",
|
298 |
-
info="Shard the model using gguf-split."
|
299 |
-
)
|
300 |
-
|
301 |
-
split_max_tensors = gr.Number(
|
302 |
-
value=256,
|
303 |
-
label="Max Tensors per File",
|
304 |
-
info="Maximum number of tensors per file when splitting model.",
|
305 |
-
visible=False
|
306 |
-
)
|
307 |
-
|
308 |
-
split_max_size = gr.Textbox(
|
309 |
-
label="Max File Size",
|
310 |
-
info="Maximum file size when splitting model (--split-max-size). May leave empty to use the default.",
|
311 |
-
visible=False
|
312 |
-
)
|
313 |
-
|
314 |
-
def update_visibility(use_imatrix):
|
315 |
-
return gr.update(visible=not use_imatrix), gr.update(visible=use_imatrix), gr.update(visible=use_imatrix)
|
316 |
-
|
317 |
-
use_imatrix.change(
|
318 |
-
fn=update_visibility,
|
319 |
-
inputs=use_imatrix,
|
320 |
-
outputs=[q_method, imatrix_q_method, train_data_file]
|
321 |
-
)
|
322 |
-
|
323 |
-
iface = gr.Interface(
|
324 |
-
fn=process_model,
|
325 |
-
inputs=[
|
326 |
-
model_id,
|
327 |
-
q_method,
|
328 |
-
use_imatrix,
|
329 |
-
imatrix_q_method,
|
330 |
-
private_repo,
|
331 |
-
train_data_file,
|
332 |
-
split_model,
|
333 |
-
split_max_tensors,
|
334 |
-
split_max_size,
|
335 |
-
gr.Textbox(value=HF_TOKEN, label="Hugging Face Token", type="password") # Correct token input field
|
336 |
-
],
|
337 |
-
outputs=[
|
338 |
-
gr.Markdown(label="output"),
|
339 |
-
gr.Image(show_label=False),
|
340 |
-
],
|
341 |
-
title="Create your own GGUF Quants, blazingly fast ⚡!",
|
342 |
-
description="The space takes an HF repo as an input, quantizes it and creates a Public repo containing the selected quant under your HF user namespace.",
|
343 |
-
api_name=False
|
344 |
-
)
|
345 |
-
|
346 |
-
def update_split_visibility(split_model):
|
347 |
-
return gr.update(visible=split_model), gr.update(visible=split_model)
|
348 |
-
|
349 |
-
split_model.change(
|
350 |
-
fn=update_split_visibility,
|
351 |
-
inputs=split_model,
|
352 |
-
outputs=[split_max_tensors, split_max_size]
|
353 |
-
)
|
354 |
-
|
355 |
-
def restart_space():
|
356 |
-
HfApi().restart_space(repo_id="ggml-org/gguf-my-repo", token=HF_TOKEN, factory_reboot=True)
|
357 |
-
|
358 |
-
scheduler = BackgroundScheduler()
|
359 |
-
scheduler.add_job(restart_space, "interval", seconds=21600)
|
360 |
-
scheduler.start()
|
361 |
-
|
362 |
-
# Launch the interface
|
363 |
-
demo.queue(default_concurrency_limit=1, max_size=5).launch(debug=True, show_api=False)
|
|
|
3 |
import subprocess
|
4 |
import signal
|
5 |
import gradio as gr
|
6 |
+
from huggingface_hub import create_repo, HfApi, snapshot_download, whoami, ModelCard
|
7 |
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
8 |
from apscheduler.schedulers.background import BackgroundScheduler
|
9 |
from textwrap import dedent
|
10 |
|
11 |
+
# Ensure the token is set from the environment, if not prompt user
|
12 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
13 |
|
14 |
def ensure_valid_token(oauth_token):
|
15 |
+
if not oauth_token or not oauth_token.strip():
|
16 |
raise ValueError("You must be logged in.")
|
17 |
+
return oauth_token.strip()
|
18 |
|
19 |
def generate_importance_matrix(model_path, train_data_path):
|
20 |
imatrix_command = f"./llama-imatrix -m ../{model_path} -f {train_data_path} -ngl 99 --output-frequency 10"
|
|
|
38 |
try:
|
39 |
process.wait(timeout=5)
|
40 |
except subprocess.TimeoutExpired:
|
41 |
+
print("Imatrix proc still didn't terminate. Forcefully terminating process...")
|
42 |
process.kill()
|
43 |
|
44 |
os.chdir("..")
|
45 |
+
|
46 |
print("Importance matrix generation completed.")
|
47 |
|
48 |
def split_upload_model(model_path, repo_id, oauth_token, split_max_tensors=256, split_max_size=None):
|
49 |
+
if not oauth_token or not oauth_token.strip():
|
50 |
+
raise ValueError("You have to be logged in.")
|
51 |
|
52 |
split_cmd = f"llama.cpp/llama-gguf-split --split --split-max-tensors {split_max_tensors}"
|
53 |
if split_max_size:
|
|
|
63 |
if result.returncode != 0:
|
64 |
raise Exception(f"Error splitting the model: {result.stderr}")
|
65 |
print("Model split successfully!")
|
66 |
+
|
67 |
sharded_model_files = [f for f in os.listdir('.') if f.startswith(model_path.split('.')[0])]
|
68 |
if sharded_model_files:
|
69 |
print(f"Sharded model files: {sharded_model_files}")
|
70 |
+
api = HfApi(token=oauth_token)
|
71 |
for file in sharded_model_files:
|
72 |
file_path = os.path.join('.', file)
|
73 |
print(f"Uploading file: {file_path}")
|
|
|
204 |
card.save(f"README.md")
|
205 |
|
206 |
if split_model:
|
207 |
+
split_upload_model(quantized_gguf_path, new_repo_id, token, split_max_tensors, split_max_size)
|
208 |
else:
|
209 |
try:
|
210 |
print(f"Uploading quantized model: {quantized_gguf_path}")
|
|
|
244 |
shutil.rmtree(model_name, ignore_errors=True)
|
245 |
print("Folder cleaned up successfully!")
|
246 |
|
247 |
+
with gr.Blocks() as app:
|
248 |
+
gr.Markdown("# Model Processing")
|
249 |
+
|
250 |
+
# Input fields for model processing
|
251 |
+
with gr.Row():
|
252 |
+
model_id = gr.Textbox(label="Model ID", placeholder="e.g., user/model_name")
|
253 |
+
q_method = gr.Dropdown(["method1", "method2"], label="Quantization Method")
|
254 |
+
use_imatrix = gr.Checkbox(label="Use Importance Matrix")
|
255 |
+
imatrix_q_method = gr.Dropdown(["methodA", "methodB"], label="Importance Matrix Method")
|
256 |
+
private_repo = gr.Checkbox(label="Private Repository")
|
257 |
+
train_data_file = gr.File(label="Training Data File", type="file")
|
258 |
+
split_model = gr.Checkbox(label="Split Model")
|
259 |
+
split_max_tensors = gr.Number(label="Max Tensors (for splitting)", value=256)
|
260 |
+
split_max_size = gr.Number(label="Max Size (for splitting)", value=None)
|
261 |
+
oauth_token = gr.Textbox(label="Hugging Face Token", type="password")
|
262 |
+
|
263 |
+
# Output fields
|
264 |
+
result = gr.HTML()
|
265 |
+
img = gr.Image()
|
266 |
+
|
267 |
+
# Process button
|
268 |
+
process_button = gr.Button("Process Model")
|
269 |
+
process_button.click(
|
270 |
+
process_model,
|
271 |
+
inputs=[model_id, q_method, use_imatrix, imatrix_q_method, private_repo, train_data_file, split_model, split_max_tensors, split_max_size, oauth_token],
|
272 |
+
outputs=[result, img]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
273 |
)
|
274 |
|
275 |
+
app.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|