gguf-my-repo

Running

App Files Files Community

Ffftdtd5dtft commited on Sep 3, 2024

Commit

626cc4a

•

1 Parent(s): 28cefe2

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -21

app.py CHANGED Viewed

@@ -142,17 +142,10 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
         username = whoami(oauth_token.token)["name"]
         quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
         quantized_gguf_path = quantized_gguf_name
-        # Agregar opciones de cuantización k0 y q0
-        if q_method == "k0":
-            quantise_ggml = f"./llama.cpp/llama-quantize {fp16} {quantized_gguf_path} --k 0"
-        elif q_method == "q0":
-            quantise_ggml = f"./llama.cpp/llama-quantize {fp16} {quantized_gguf_path} --q 0"
-        elif use_imatrix:
             quantise_ggml = f"./llama.cpp/llama-quantize --imatrix {imatrix_path} {fp16} {quantized_gguf_path} {imatrix_q_method}"
         else:
             quantise_ggml = f"./llama.cpp/llama-quantize {fp16} {quantized_gguf_path} {q_method}"
         result = subprocess.run(quantise_ggml, shell=True, capture_output=True)
         if result.returncode != 0:
             raise Exception(f"Error quantizing: {result.stderr}")
@@ -165,12 +158,12 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
         if card.data.tags is None:
             card.data.tags = []
         card.data.tags.append("llama-cpp")
-        card.data.tags.append("Ffftdtd5dtft/gguf-my-repo")
         card.data.base_model = model_id
         card.text = dedent(
             f"""
             # {new_repo_id}
-            This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp via the ggml.ai's [GGUF-my-repo](https://huggingface.co/spaces/Ffftdtd5dtft/gguf-my-repo) space.
             Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
             ## Use with llama.cpp
@@ -239,16 +232,7 @@ with gr.Blocks(css=css) as demo:
     gr.Markdown("You must be logged in to use GGUF-my-repo.")
     gr.LoginButton(min_width=250)
     model_id = HuggingfaceHubSearch(label="Hub Model ID", placeholder="Search for model id on Huggingface", search_type="model")
-    # Agregar opciones k0 y q0 al dropdown de cuantización
-    q_method = gr.Dropdown(
-        ["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0", "k0", "q0"],
-        label="Quantization Method",
-        info="GGML quantization type",
-        value="Q4_K_M",
-        filterable=False,
-        visible=True
-    )
     imatrix_q_method = gr.Dropdown(["IQ3_M", "IQ3_XXS", "Q4_K_M", "Q4_K_S", "IQ4_NL", "IQ4_XS", "Q5_K_M", "Q5_K_S"], label="Imatrix Quantization Method", info="GGML imatrix quants type", value="IQ4_NL", filterable=False, visible=False)
     use_imatrix = gr.Checkbox(value=False, label="Use Imatrix Quantization", info="Use importance matrix for quantization.")
     private_repo = gr.Checkbox(value=False, label="Private Repo", info="Create a private repo under your username.")
@@ -286,7 +270,7 @@ with gr.Blocks(css=css) as demo:
     )
 def restart_space():
-    HfApi().restart_space(repo_id="Ffftdtd5dtft/gguf-my-repo", token=HF_TOKEN, factory_reboot=True)
 scheduler = BackgroundScheduler()
 scheduler.add_job(restart_space, "interval", seconds=21600)

         username = whoami(oauth_token.token)["name"]
         quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
         quantized_gguf_path = quantized_gguf_name
+        if use_imatrix:
             quantise_ggml = f"./llama.cpp/llama-quantize --imatrix {imatrix_path} {fp16} {quantized_gguf_path} {imatrix_q_method}"
         else:
             quantise_ggml = f"./llama.cpp/llama-quantize {fp16} {quantized_gguf_path} {q_method}"
         result = subprocess.run(quantise_ggml, shell=True, capture_output=True)
         if result.returncode != 0:
             raise Exception(f"Error quantizing: {result.stderr}")
         if card.data.tags is None:
             card.data.tags = []
         card.data.tags.append("llama-cpp")
+        card.data.tags.append("gguf-my-repo")
         card.data.base_model = model_id
         card.text = dedent(
             f"""
             # {new_repo_id}
+            This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp via the ggml.ai's [GGUF-my-repo](https://huggingface.co/spaces/ggml-org/gguf-my-repo) space.
             Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
             ## Use with llama.cpp
     gr.Markdown("You must be logged in to use GGUF-my-repo.")
     gr.LoginButton(min_width=250)
     model_id = HuggingfaceHubSearch(label="Hub Model ID", placeholder="Search for model id on Huggingface", search_type="model")
+    q_method = gr.Dropdown(["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"], label="Quantization Method", info="GGML quantization type", value="Q4_K_M", filterable=False, visible=True)
     imatrix_q_method = gr.Dropdown(["IQ3_M", "IQ3_XXS", "Q4_K_M", "Q4_K_S", "IQ4_NL", "IQ4_XS", "Q5_K_M", "Q5_K_S"], label="Imatrix Quantization Method", info="GGML imatrix quants type", value="IQ4_NL", filterable=False, visible=False)
     use_imatrix = gr.Checkbox(value=False, label="Use Imatrix Quantization", info="Use importance matrix for quantization.")
     private_repo = gr.Checkbox(value=False, label="Private Repo", info="Create a private repo under your username.")
     )
 def restart_space():
+    HfApi().restart_space(repo_id="ggml-org/gguf-my-repo", token=HF_TOKEN, factory_reboot=True)
 scheduler = BackgroundScheduler()
 scheduler.add_job(restart_space, "interval", seconds=21600)