Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -18,7 +18,7 @@ def generate_importance_matrix(model_path, train_data_path):
|
|
18 |
print(f"Current working directory: {os.getcwd()}")
|
19 |
print(f"Files in the current directory: {os.listdir('.')}")
|
20 |
|
21 |
-
if not os.path.isfile(f"../{model_path}"):
|
22 |
raise Exception(f"Model file not found: {model_path}")
|
23 |
|
24 |
print("Running imatrix command...")
|
@@ -196,34 +196,77 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
196 |
```
|
197 |
cd llama.cpp && LLAMA_CURL=1 make
|
198 |
```
|
199 |
-
Step 3: Fetch model weights from HF using curl command and
|
200 |
```
|
201 |
-
curl -L {new_repo_id}
|
|
|
202 |
```
|
|
|
|
|
|
|
203 |
"""
|
204 |
)
|
205 |
-
if use_imatrix:
|
206 |
-
card.text += "\nNote: This model was quantized using imatrix."
|
207 |
-
|
208 |
card.push_to_hub(repo_id=new_repo_id, token=oauth_token.token)
|
209 |
api.upload_file(
|
210 |
path_or_fileobj=quantized_gguf_path,
|
211 |
path_in_repo=quantized_gguf_name,
|
212 |
repo_id=new_repo_id,
|
213 |
-
token=oauth_token.token,
|
214 |
)
|
215 |
-
print("Pushed model to the hub!")
|
216 |
if split_model:
|
217 |
-
split_upload_model(
|
|
|
|
|
|
|
|
|
218 |
except Exception as e:
|
219 |
-
print("
|
220 |
-
|
221 |
finally:
|
222 |
-
os.
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
|
228 |
-
|
|
|
229 |
|
|
|
|
18 |
print(f"Current working directory: {os.getcwd()}")
|
19 |
print(f"Files in the current directory: {os.listdir('.')}")
|
20 |
|
21 |
+
if not os.path.isfile(f"../{model_path}")):
|
22 |
raise Exception(f"Model file not found: {model_path}")
|
23 |
|
24 |
print("Running imatrix command...")
|
|
|
196 |
```
|
197 |
cd llama.cpp && LLAMA_CURL=1 make
|
198 |
```
|
199 |
+
Step 3: Fetch model weights from HF using curl command and run the models directly!
|
200 |
```
|
201 |
+
curl -L https://huggingface.co/{new_repo_id}/resolve/main/{quantized_gguf_name} -o ./models/{quantized_gguf_name}
|
202 |
+
./llama -m ./models/{quantized_gguf_name} -p "Hello, world!"
|
203 |
```
|
204 |
+
|
205 |
+
## Additional Notes:
|
206 |
+
To gain higher performance, ensure that you have aligned on llama.cpp's threading tips by having your CPU fully utilized and setting threads dynamically using `OMP_NUM_THREADS`.
|
207 |
"""
|
208 |
)
|
|
|
|
|
|
|
209 |
card.push_to_hub(repo_id=new_repo_id, token=oauth_token.token)
|
210 |
api.upload_file(
|
211 |
path_or_fileobj=quantized_gguf_path,
|
212 |
path_in_repo=quantized_gguf_name,
|
213 |
repo_id=new_repo_id,
|
|
|
214 |
)
|
|
|
215 |
if split_model:
|
216 |
+
split_upload_model(quantized_gguf_path, new_repo_id, oauth_token, split_max_tensors=split_max_tensors, split_max_size=split_max_size)
|
217 |
+
else:
|
218 |
+
print("Model split skipped by user.")
|
219 |
+
|
220 |
+
print("Model has been uploaded successfully!")
|
221 |
except Exception as e:
|
222 |
+
print(f"An error occurred: {str(e)}")
|
223 |
+
return False, str(e)
|
224 |
finally:
|
225 |
+
if os.path.exists(fp16):
|
226 |
+
os.remove(fp16)
|
227 |
+
if os.path.exists(quantized_gguf_path):
|
228 |
+
os.remove(quantized_gguf_path)
|
229 |
+
shutil.rmtree(model_name)
|
230 |
+
print(f"Removed temporary files for model {model_name}")
|
231 |
+
|
232 |
+
return True, None
|
233 |
+
|
234 |
+
def app_interface():
|
235 |
+
with gr.Blocks() as demo:
|
236 |
+
gr.Markdown("## GGUF Model Processing")
|
237 |
+
|
238 |
+
with gr.Row():
|
239 |
+
with gr.Column():
|
240 |
+
repo_id = gr.Textbox(label="HuggingFace Repo ID")
|
241 |
+
model_id = gr.Textbox(label="Model ID")
|
242 |
+
q_method = gr.Dropdown(["q4_0", "q4_1", "q5_0", "q5_1", "q8_0"], label="Quantization Method")
|
243 |
+
imatrix_q_method = gr.Dropdown(["q4_0", "q4_1", "q5_0", "q5_1", "q8_0"], label="Imatrix Quantization Method")
|
244 |
+
use_imatrix = gr.Checkbox(label="Use Importance Matrix")
|
245 |
+
private_repo = gr.Checkbox(label="Private Repo")
|
246 |
+
train_data_file = gr.File(label="Training Data File (Optional)")
|
247 |
+
split_model = gr.Checkbox(label="Split Model")
|
248 |
+
split_max_tensors = gr.Number(label="Max Tensors per Shard", value=256)
|
249 |
+
split_max_size = gr.Number(label="Max Shard Size (MB)", value=None)
|
250 |
+
with gr.Column():
|
251 |
+
oauth_token = gr.oauth.HuggingFace(
|
252 |
+
"Gradio OAuth Authentication",
|
253 |
+
token=HF_TOKEN,
|
254 |
+
)
|
255 |
+
|
256 |
+
process_btn = gr.Button("Process Model")
|
257 |
+
process_btn.click(
|
258 |
+
process_model,
|
259 |
+
[model_id, q_method, use_imatrix, imatrix_q_method, private_repo, train_data_file, split_model, split_max_tensors, split_max_size, oauth_token],
|
260 |
+
outputs=["status_text"]
|
261 |
+
)
|
262 |
+
|
263 |
+
return demo
|
264 |
+
|
265 |
+
if __name__ == "__main__":
|
266 |
+
scheduler = BackgroundScheduler(daemon=True)
|
267 |
+
scheduler.start()
|
268 |
|
269 |
+
demo = app_interface()
|
270 |
+
demo.launch()
|
271 |
|
272 |
+
signal.signal(signal.SIGINT, signal.SIG_DFL)
|