Spaces:
Running
Running
Ffftdtd5dtft
commited on
Commit
•
626cc4a
1
Parent(s):
28cefe2
Update app.py
Browse files
app.py
CHANGED
@@ -142,17 +142,10 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
142 |
username = whoami(oauth_token.token)["name"]
|
143 |
quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
|
144 |
quantized_gguf_path = quantized_gguf_name
|
145 |
-
|
146 |
-
# Agregar opciones de cuantización k0 y q0
|
147 |
-
if q_method == "k0":
|
148 |
-
quantise_ggml = f"./llama.cpp/llama-quantize {fp16} {quantized_gguf_path} --k 0"
|
149 |
-
elif q_method == "q0":
|
150 |
-
quantise_ggml = f"./llama.cpp/llama-quantize {fp16} {quantized_gguf_path} --q 0"
|
151 |
-
elif use_imatrix:
|
152 |
quantise_ggml = f"./llama.cpp/llama-quantize --imatrix {imatrix_path} {fp16} {quantized_gguf_path} {imatrix_q_method}"
|
153 |
else:
|
154 |
quantise_ggml = f"./llama.cpp/llama-quantize {fp16} {quantized_gguf_path} {q_method}"
|
155 |
-
|
156 |
result = subprocess.run(quantise_ggml, shell=True, capture_output=True)
|
157 |
if result.returncode != 0:
|
158 |
raise Exception(f"Error quantizing: {result.stderr}")
|
@@ -165,12 +158,12 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
165 |
if card.data.tags is None:
|
166 |
card.data.tags = []
|
167 |
card.data.tags.append("llama-cpp")
|
168 |
-
card.data.tags.append("
|
169 |
card.data.base_model = model_id
|
170 |
card.text = dedent(
|
171 |
f"""
|
172 |
# {new_repo_id}
|
173 |
-
This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp via the ggml.ai's [GGUF-my-repo](https://huggingface.co/spaces/
|
174 |
Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
|
175 |
|
176 |
## Use with llama.cpp
|
@@ -239,16 +232,7 @@ with gr.Blocks(css=css) as demo:
|
|
239 |
gr.Markdown("You must be logged in to use GGUF-my-repo.")
|
240 |
gr.LoginButton(min_width=250)
|
241 |
model_id = HuggingfaceHubSearch(label="Hub Model ID", placeholder="Search for model id on Huggingface", search_type="model")
|
242 |
-
|
243 |
-
# Agregar opciones k0 y q0 al dropdown de cuantización
|
244 |
-
q_method = gr.Dropdown(
|
245 |
-
["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0", "k0", "q0"],
|
246 |
-
label="Quantization Method",
|
247 |
-
info="GGML quantization type",
|
248 |
-
value="Q4_K_M",
|
249 |
-
filterable=False,
|
250 |
-
visible=True
|
251 |
-
)
|
252 |
imatrix_q_method = gr.Dropdown(["IQ3_M", "IQ3_XXS", "Q4_K_M", "Q4_K_S", "IQ4_NL", "IQ4_XS", "Q5_K_M", "Q5_K_S"], label="Imatrix Quantization Method", info="GGML imatrix quants type", value="IQ4_NL", filterable=False, visible=False)
|
253 |
use_imatrix = gr.Checkbox(value=False, label="Use Imatrix Quantization", info="Use importance matrix for quantization.")
|
254 |
private_repo = gr.Checkbox(value=False, label="Private Repo", info="Create a private repo under your username.")
|
@@ -286,7 +270,7 @@ with gr.Blocks(css=css) as demo:
|
|
286 |
)
|
287 |
|
288 |
def restart_space():
|
289 |
-
HfApi().restart_space(repo_id="
|
290 |
|
291 |
scheduler = BackgroundScheduler()
|
292 |
scheduler.add_job(restart_space, "interval", seconds=21600)
|
|
|
142 |
username = whoami(oauth_token.token)["name"]
|
143 |
quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
|
144 |
quantized_gguf_path = quantized_gguf_name
|
145 |
+
if use_imatrix:
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
quantise_ggml = f"./llama.cpp/llama-quantize --imatrix {imatrix_path} {fp16} {quantized_gguf_path} {imatrix_q_method}"
|
147 |
else:
|
148 |
quantise_ggml = f"./llama.cpp/llama-quantize {fp16} {quantized_gguf_path} {q_method}"
|
|
|
149 |
result = subprocess.run(quantise_ggml, shell=True, capture_output=True)
|
150 |
if result.returncode != 0:
|
151 |
raise Exception(f"Error quantizing: {result.stderr}")
|
|
|
158 |
if card.data.tags is None:
|
159 |
card.data.tags = []
|
160 |
card.data.tags.append("llama-cpp")
|
161 |
+
card.data.tags.append("gguf-my-repo")
|
162 |
card.data.base_model = model_id
|
163 |
card.text = dedent(
|
164 |
f"""
|
165 |
# {new_repo_id}
|
166 |
+
This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp via the ggml.ai's [GGUF-my-repo](https://huggingface.co/spaces/ggml-org/gguf-my-repo) space.
|
167 |
Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
|
168 |
|
169 |
## Use with llama.cpp
|
|
|
232 |
gr.Markdown("You must be logged in to use GGUF-my-repo.")
|
233 |
gr.LoginButton(min_width=250)
|
234 |
model_id = HuggingfaceHubSearch(label="Hub Model ID", placeholder="Search for model id on Huggingface", search_type="model")
|
235 |
+
q_method = gr.Dropdown(["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"], label="Quantization Method", info="GGML quantization type", value="Q4_K_M", filterable=False, visible=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
imatrix_q_method = gr.Dropdown(["IQ3_M", "IQ3_XXS", "Q4_K_M", "Q4_K_S", "IQ4_NL", "IQ4_XS", "Q5_K_M", "Q5_K_S"], label="Imatrix Quantization Method", info="GGML imatrix quants type", value="IQ4_NL", filterable=False, visible=False)
|
237 |
use_imatrix = gr.Checkbox(value=False, label="Use Imatrix Quantization", info="Use importance matrix for quantization.")
|
238 |
private_repo = gr.Checkbox(value=False, label="Private Repo", info="Create a private repo under your username.")
|
|
|
270 |
)
|
271 |
|
272 |
def restart_space():
|
273 |
+
HfApi().restart_space(repo_id="ggml-org/gguf-my-repo", token=HF_TOKEN, factory_reboot=True)
|
274 |
|
275 |
scheduler = BackgroundScheduler()
|
276 |
scheduler.add_job(restart_space, "interval", seconds=21600)
|