Ffftdtd5dtft commited on
Commit
a421ab7
·
verified ·
1 Parent(s): dc7d4b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -126
app.py CHANGED
@@ -3,17 +3,18 @@ import shutil
3
  import subprocess
4
  import signal
5
  import gradio as gr
6
- from huggingface_hub import create_repo, HfApi, whoami, ModelCard
7
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
8
  from apscheduler.schedulers.background import BackgroundScheduler
9
  from textwrap import dedent
10
 
 
11
  HF_TOKEN = os.environ.get("HF_TOKEN")
12
 
13
  def ensure_valid_token(oauth_token):
14
- if oauth_token is None or oauth_token.token is None:
15
  raise ValueError("You must be logged in.")
16
- return oauth_token.token
17
 
18
  def generate_importance_matrix(model_path, train_data_path):
19
  imatrix_command = f"./llama-imatrix -m ../{model_path} -f {train_data_path} -ngl 99 --output-frequency 10"
@@ -37,14 +38,16 @@ def generate_importance_matrix(model_path, train_data_path):
37
  try:
38
  process.wait(timeout=5)
39
  except subprocess.TimeoutExpired:
40
- print("Imatrix process still didn't terminate. Forcefully terminating process...")
41
  process.kill()
42
 
43
  os.chdir("..")
 
44
  print("Importance matrix generation completed.")
45
 
46
  def split_upload_model(model_path, repo_id, oauth_token, split_max_tensors=256, split_max_size=None):
47
- token = ensure_valid_token(oauth_token)
 
48
 
49
  split_cmd = f"llama.cpp/llama-gguf-split --split --split-max-tensors {split_max_tensors}"
50
  if split_max_size:
@@ -60,11 +63,11 @@ def split_upload_model(model_path, repo_id, oauth_token, split_max_tensors=256,
60
  if result.returncode != 0:
61
  raise Exception(f"Error splitting the model: {result.stderr}")
62
  print("Model split successfully!")
63
-
64
  sharded_model_files = [f for f in os.listdir('.') if f.startswith(model_path.split('.')[0])]
65
  if sharded_model_files:
66
  print(f"Sharded model files: {sharded_model_files}")
67
- api = HfApi(token=token)
68
  for file in sharded_model_files:
69
  file_path = os.path.join('.', file)
70
  print(f"Uploading file: {file_path}")
@@ -201,7 +204,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
201
  card.save(f"README.md")
202
 
203
  if split_model:
204
- split_upload_model(quantized_gguf_path, new_repo_id, oauth_token, split_max_tensors, split_max_size)
205
  else:
206
  try:
207
  print(f"Uploading quantized model: {quantized_gguf_path}")
@@ -241,123 +244,32 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
241
  shutil.rmtree(model_name, ignore_errors=True)
242
  print("Folder cleaned up successfully!")
243
 
244
- css = """/* Custom CSS to allow scrolling */
245
- .gradio-container {overflow-y: auto;}
246
- """
247
-
248
- # Create Gradio interface
249
- with gr.Blocks(css=css) as demo:
250
- gr.Markdown("You must be logged in to use GGUF-my-repo.")
251
- gr.LoginButton(min_width=250)
252
-
253
- model_id = HuggingfaceHubSearch(
254
- label="Hub Model ID",
255
- placeholder="Search for model id on Huggingface",
256
- search_type="model",
257
- )
258
-
259
- q_method = gr.Dropdown(
260
- ["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"],
261
- label="Quantization Method",
262
- info="GGML quantization type",
263
- value="Q4_K_M",
264
- filterable=False,
265
- visible=True
266
- )
267
-
268
- imatrix_q_method = gr.Dropdown(
269
- ["IQ3_M", "IQ3_XXS", "Q4_K_M", "Q4_K_S", "IQ4_NL", "IQ4_XS", "Q5_K_M", "Q5_K_S"],
270
- label="Imatrix Quantization Method",
271
- info="GGML imatrix quants type",
272
- value="IQ4_NL",
273
- filterable=False,
274
- visible=False
275
- )
276
-
277
- use_imatrix = gr.Checkbox(
278
- value=False,
279
- label="Use Imatrix Quantization",
280
- info="Use importance matrix for quantization."
281
- )
282
-
283
- private_repo = gr.Checkbox(
284
- value=False,
285
- label="Private Repo",
286
- info="Create a private repo under your username."
287
- )
288
-
289
- train_data_file = gr.File(
290
- label="Training Data File",
291
- file_types=["txt"],
292
- visible=False
293
  )
294
 
295
- split_model = gr.Checkbox(
296
- value=False,
297
- label="Split Model",
298
- info="Shard the model using gguf-split."
299
- )
300
-
301
- split_max_tensors = gr.Number(
302
- value=256,
303
- label="Max Tensors per File",
304
- info="Maximum number of tensors per file when splitting model.",
305
- visible=False
306
- )
307
-
308
- split_max_size = gr.Textbox(
309
- label="Max File Size",
310
- info="Maximum file size when splitting model (--split-max-size). May leave empty to use the default.",
311
- visible=False
312
- )
313
-
314
- def update_visibility(use_imatrix):
315
- return gr.update(visible=not use_imatrix), gr.update(visible=use_imatrix), gr.update(visible=use_imatrix)
316
-
317
- use_imatrix.change(
318
- fn=update_visibility,
319
- inputs=use_imatrix,
320
- outputs=[q_method, imatrix_q_method, train_data_file]
321
- )
322
-
323
- iface = gr.Interface(
324
- fn=process_model,
325
- inputs=[
326
- model_id,
327
- q_method,
328
- use_imatrix,
329
- imatrix_q_method,
330
- private_repo,
331
- train_data_file,
332
- split_model,
333
- split_max_tensors,
334
- split_max_size,
335
- gr.Textbox(value=HF_TOKEN, label="Hugging Face Token", type="password") # Correct token input field
336
- ],
337
- outputs=[
338
- gr.Markdown(label="output"),
339
- gr.Image(show_label=False),
340
- ],
341
- title="Create your own GGUF Quants, blazingly fast ⚡!",
342
- description="The space takes an HF repo as an input, quantizes it and creates a Public repo containing the selected quant under your HF user namespace.",
343
- api_name=False
344
- )
345
-
346
- def update_split_visibility(split_model):
347
- return gr.update(visible=split_model), gr.update(visible=split_model)
348
-
349
- split_model.change(
350
- fn=update_split_visibility,
351
- inputs=split_model,
352
- outputs=[split_max_tensors, split_max_size]
353
- )
354
-
355
- def restart_space():
356
- HfApi().restart_space(repo_id="ggml-org/gguf-my-repo", token=HF_TOKEN, factory_reboot=True)
357
-
358
- scheduler = BackgroundScheduler()
359
- scheduler.add_job(restart_space, "interval", seconds=21600)
360
- scheduler.start()
361
-
362
- # Launch the interface
363
- demo.queue(default_concurrency_limit=1, max_size=5).launch(debug=True, show_api=False)
 
3
  import subprocess
4
  import signal
5
  import gradio as gr
6
+ from huggingface_hub import create_repo, HfApi, snapshot_download, whoami, ModelCard
7
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
8
  from apscheduler.schedulers.background import BackgroundScheduler
9
  from textwrap import dedent
10
 
11
+ # Ensure the token is set from the environment, if not prompt user
12
  HF_TOKEN = os.environ.get("HF_TOKEN")
13
 
14
  def ensure_valid_token(oauth_token):
15
+ if not oauth_token or not oauth_token.strip():
16
  raise ValueError("You must be logged in.")
17
+ return oauth_token.strip()
18
 
19
  def generate_importance_matrix(model_path, train_data_path):
20
  imatrix_command = f"./llama-imatrix -m ../{model_path} -f {train_data_path} -ngl 99 --output-frequency 10"
 
38
  try:
39
  process.wait(timeout=5)
40
  except subprocess.TimeoutExpired:
41
+ print("Imatrix proc still didn't terminate. Forcefully terminating process...")
42
  process.kill()
43
 
44
  os.chdir("..")
45
+
46
  print("Importance matrix generation completed.")
47
 
48
  def split_upload_model(model_path, repo_id, oauth_token, split_max_tensors=256, split_max_size=None):
49
+ if not oauth_token or not oauth_token.strip():
50
+ raise ValueError("You have to be logged in.")
51
 
52
  split_cmd = f"llama.cpp/llama-gguf-split --split --split-max-tensors {split_max_tensors}"
53
  if split_max_size:
 
63
  if result.returncode != 0:
64
  raise Exception(f"Error splitting the model: {result.stderr}")
65
  print("Model split successfully!")
66
+
67
  sharded_model_files = [f for f in os.listdir('.') if f.startswith(model_path.split('.')[0])]
68
  if sharded_model_files:
69
  print(f"Sharded model files: {sharded_model_files}")
70
+ api = HfApi(token=oauth_token)
71
  for file in sharded_model_files:
72
  file_path = os.path.join('.', file)
73
  print(f"Uploading file: {file_path}")
 
204
  card.save(f"README.md")
205
 
206
  if split_model:
207
+ split_upload_model(quantized_gguf_path, new_repo_id, token, split_max_tensors, split_max_size)
208
  else:
209
  try:
210
  print(f"Uploading quantized model: {quantized_gguf_path}")
 
244
  shutil.rmtree(model_name, ignore_errors=True)
245
  print("Folder cleaned up successfully!")
246
 
247
+ with gr.Blocks() as app:
248
+ gr.Markdown("# Model Processing")
249
+
250
+ # Input fields for model processing
251
+ with gr.Row():
252
+ model_id = gr.Textbox(label="Model ID", placeholder="e.g., user/model_name")
253
+ q_method = gr.Dropdown(["method1", "method2"], label="Quantization Method")
254
+ use_imatrix = gr.Checkbox(label="Use Importance Matrix")
255
+ imatrix_q_method = gr.Dropdown(["methodA", "methodB"], label="Importance Matrix Method")
256
+ private_repo = gr.Checkbox(label="Private Repository")
257
+ train_data_file = gr.File(label="Training Data File", type="file")
258
+ split_model = gr.Checkbox(label="Split Model")
259
+ split_max_tensors = gr.Number(label="Max Tensors (for splitting)", value=256)
260
+ split_max_size = gr.Number(label="Max Size (for splitting)", value=None)
261
+ oauth_token = gr.Textbox(label="Hugging Face Token", type="password")
262
+
263
+ # Output fields
264
+ result = gr.HTML()
265
+ img = gr.Image()
266
+
267
+ # Process button
268
+ process_button = gr.Button("Process Model")
269
+ process_button.click(
270
+ process_model,
271
+ inputs=[model_id, q_method, use_imatrix, imatrix_q_method, private_repo, train_data_file, split_model, split_max_tensors, split_max_size, oauth_token],
272
+ outputs=[result, img]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  )
274
 
275
+ app.launch()