Ffftdtd5dtft commited on
Commit
626cc4a
1 Parent(s): 28cefe2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -21
app.py CHANGED
@@ -142,17 +142,10 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
142
  username = whoami(oauth_token.token)["name"]
143
  quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
144
  quantized_gguf_path = quantized_gguf_name
145
-
146
- # Agregar opciones de cuantización k0 y q0
147
- if q_method == "k0":
148
- quantise_ggml = f"./llama.cpp/llama-quantize {fp16} {quantized_gguf_path} --k 0"
149
- elif q_method == "q0":
150
- quantise_ggml = f"./llama.cpp/llama-quantize {fp16} {quantized_gguf_path} --q 0"
151
- elif use_imatrix:
152
  quantise_ggml = f"./llama.cpp/llama-quantize --imatrix {imatrix_path} {fp16} {quantized_gguf_path} {imatrix_q_method}"
153
  else:
154
  quantise_ggml = f"./llama.cpp/llama-quantize {fp16} {quantized_gguf_path} {q_method}"
155
-
156
  result = subprocess.run(quantise_ggml, shell=True, capture_output=True)
157
  if result.returncode != 0:
158
  raise Exception(f"Error quantizing: {result.stderr}")
@@ -165,12 +158,12 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
165
  if card.data.tags is None:
166
  card.data.tags = []
167
  card.data.tags.append("llama-cpp")
168
- card.data.tags.append("Ffftdtd5dtft/gguf-my-repo")
169
  card.data.base_model = model_id
170
  card.text = dedent(
171
  f"""
172
  # {new_repo_id}
173
- This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp via the ggml.ai's [GGUF-my-repo](https://huggingface.co/spaces/Ffftdtd5dtft/gguf-my-repo) space.
174
  Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
175
 
176
  ## Use with llama.cpp
@@ -239,16 +232,7 @@ with gr.Blocks(css=css) as demo:
239
  gr.Markdown("You must be logged in to use GGUF-my-repo.")
240
  gr.LoginButton(min_width=250)
241
  model_id = HuggingfaceHubSearch(label="Hub Model ID", placeholder="Search for model id on Huggingface", search_type="model")
242
-
243
- # Agregar opciones k0 y q0 al dropdown de cuantización
244
- q_method = gr.Dropdown(
245
- ["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0", "k0", "q0"],
246
- label="Quantization Method",
247
- info="GGML quantization type",
248
- value="Q4_K_M",
249
- filterable=False,
250
- visible=True
251
- )
252
  imatrix_q_method = gr.Dropdown(["IQ3_M", "IQ3_XXS", "Q4_K_M", "Q4_K_S", "IQ4_NL", "IQ4_XS", "Q5_K_M", "Q5_K_S"], label="Imatrix Quantization Method", info="GGML imatrix quants type", value="IQ4_NL", filterable=False, visible=False)
253
  use_imatrix = gr.Checkbox(value=False, label="Use Imatrix Quantization", info="Use importance matrix for quantization.")
254
  private_repo = gr.Checkbox(value=False, label="Private Repo", info="Create a private repo under your username.")
@@ -286,7 +270,7 @@ with gr.Blocks(css=css) as demo:
286
  )
287
 
288
  def restart_space():
289
- HfApi().restart_space(repo_id="Ffftdtd5dtft/gguf-my-repo", token=HF_TOKEN, factory_reboot=True)
290
 
291
  scheduler = BackgroundScheduler()
292
  scheduler.add_job(restart_space, "interval", seconds=21600)
 
142
  username = whoami(oauth_token.token)["name"]
143
  quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
144
  quantized_gguf_path = quantized_gguf_name
145
+ if use_imatrix:
 
 
 
 
 
 
146
  quantise_ggml = f"./llama.cpp/llama-quantize --imatrix {imatrix_path} {fp16} {quantized_gguf_path} {imatrix_q_method}"
147
  else:
148
  quantise_ggml = f"./llama.cpp/llama-quantize {fp16} {quantized_gguf_path} {q_method}"
 
149
  result = subprocess.run(quantise_ggml, shell=True, capture_output=True)
150
  if result.returncode != 0:
151
  raise Exception(f"Error quantizing: {result.stderr}")
 
158
  if card.data.tags is None:
159
  card.data.tags = []
160
  card.data.tags.append("llama-cpp")
161
+ card.data.tags.append("gguf-my-repo")
162
  card.data.base_model = model_id
163
  card.text = dedent(
164
  f"""
165
  # {new_repo_id}
166
+ This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp via the ggml.ai's [GGUF-my-repo](https://huggingface.co/spaces/ggml-org/gguf-my-repo) space.
167
  Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
168
 
169
  ## Use with llama.cpp
 
232
  gr.Markdown("You must be logged in to use GGUF-my-repo.")
233
  gr.LoginButton(min_width=250)
234
  model_id = HuggingfaceHubSearch(label="Hub Model ID", placeholder="Search for model id on Huggingface", search_type="model")
235
+ q_method = gr.Dropdown(["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"], label="Quantization Method", info="GGML quantization type", value="Q4_K_M", filterable=False, visible=True)
 
 
 
 
 
 
 
 
 
236
  imatrix_q_method = gr.Dropdown(["IQ3_M", "IQ3_XXS", "Q4_K_M", "Q4_K_S", "IQ4_NL", "IQ4_XS", "Q5_K_M", "Q5_K_S"], label="Imatrix Quantization Method", info="GGML imatrix quants type", value="IQ4_NL", filterable=False, visible=False)
237
  use_imatrix = gr.Checkbox(value=False, label="Use Imatrix Quantization", info="Use importance matrix for quantization.")
238
  private_repo = gr.Checkbox(value=False, label="Private Repo", info="Create a private repo under your username.")
 
270
  )
271
 
272
  def restart_space():
273
+ HfApi().restart_space(repo_id="ggml-org/gguf-my-repo", token=HF_TOKEN, factory_reboot=True)
274
 
275
  scheduler = BackgroundScheduler()
276
  scheduler.add_job(restart_space, "interval", seconds=21600)