Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,113 +1,99 @@
|
|
1 |
-
|
2 |
-
import streamlit as st
|
3 |
-
import subprocess
|
4 |
import os
|
5 |
-
import
|
6 |
-
|
7 |
-
from
|
8 |
-
import tempfile
|
9 |
|
10 |
-
# Define
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
|
16 |
-
def
|
17 |
-
"""
|
18 |
-
|
19 |
-
|
20 |
-
)
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
-
|
24 |
-
st.markdown(
|
25 |
"""
|
26 |
-
|
27 |
"""
|
28 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
-
st.
|
31 |
-
st.
|
32 |
-
|
33 |
-
password = st.sidebar.text_input("Hugging Face Password", type="password")
|
34 |
-
model_repo_id = st.sidebar.text_input("Model Repository ID", "Qwen/Qwen2.5-3B")
|
35 |
-
quantization_options = ["q4_k_m", "q4_0", "q4_1"]
|
36 |
-
quantization_type = st.sidebar.selectbox("Select Quantization Type", quantization_options)
|
37 |
-
quant_options = ["f32", "f16", "bf16", "q8_0", "auto"]
|
38 |
-
quant_type = st.sidebar.selectbox("Select GGUF Output Type", quant_options)
|
39 |
-
upload_option = st.sidebar.checkbox("Upload quantized model to Hugging Face?", value=False)
|
40 |
-
run_button = st.button("Run Pipeline")
|
41 |
|
42 |
-
if
|
43 |
-
st.
|
44 |
-
|
45 |
-
logs = []
|
46 |
-
|
47 |
-
def log(message):
|
48 |
-
logs.append(message)
|
49 |
-
log_area.text("\n".join(logs))
|
50 |
-
|
51 |
-
try:
|
52 |
-
# Download the llama.cpp repository
|
53 |
-
snapshot_download(repo_id="KBaba7/llama.cpp", local_dir="llama.cpp", repo_type="space")
|
54 |
-
|
55 |
-
# Create temporary directories for the original and quantized models
|
56 |
-
temp_path = Path(tempfile.gettempdir())
|
57 |
-
original_model_dir = temp_path / "original_model"
|
58 |
-
quantized_model_dir = temp_path / "quantized_model"
|
59 |
-
original_model_dir.mkdir(parents=True, exist_ok=True)
|
60 |
-
quantized_model_dir.mkdir(parents=True, exist_ok=True)
|
61 |
-
|
62 |
-
log("Downloading model from Hugging Face...")
|
63 |
-
snapshot_download(repo_id=model_repo_id, local_dir=str(original_model_dir), local_dir_use_symlinks=False)
|
64 |
-
log(f"Model downloaded to: {original_model_dir}")
|
65 |
-
|
66 |
-
log("Converting model to GGUF format...")
|
67 |
-
conversion_outfile = quantized_model_dir / "model_converted.gguf"
|
68 |
-
conversion_cmd = (
|
69 |
-
f"python3 convert-hf-to-gguf.py {original_model_dir} --outtype {quant_type} "
|
70 |
-
f"--outfile {conversion_outfile}"
|
71 |
-
)
|
72 |
-
conv_stdout, conv_stderr = run_command(conversion_cmd)
|
73 |
-
log(conv_stdout + conv_stderr)
|
74 |
-
|
75 |
-
if not conversion_outfile.exists():
|
76 |
-
log("Error: GGUF conversion failed! No output file found.")
|
77 |
-
st.error("GGUF conversion failed. Check logs.")
|
78 |
-
st.stop()
|
79 |
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
quant_stdout, quant_stderr = run_command(quantize_cmd)
|
84 |
-
log(quant_stdout + quant_stderr)
|
85 |
-
|
86 |
-
if not quantized_model_outfile.exists():
|
87 |
-
log("Error: Quantization failed! No output file found.")
|
88 |
-
st.error("Quantization failed. Check logs.")
|
89 |
-
st.stop()
|
90 |
-
|
91 |
-
log("Pipeline completed successfully!")
|
92 |
-
st.success("Quantized model ready for download.")
|
93 |
-
with open(quantized_model_outfile, "rb") as file:
|
94 |
-
st.download_button(label="Download Quantized Model", data=file, file_name=quantized_model_outfile.name)
|
95 |
-
|
96 |
-
# Upload if selected
|
97 |
-
if upload_option:
|
98 |
-
log("Uploading quantized model to Hugging Face...")
|
99 |
-
login(username, password)
|
100 |
-
api = HfApi()
|
101 |
-
target_repo = f"automated-quantization/{quantized_model_outfile.stem}"
|
102 |
-
api.create_repo(target_repo, exist_ok=True, repo_type="model")
|
103 |
-
api.upload_file(
|
104 |
-
path_or_fileobj=str(quantized_model_outfile),
|
105 |
-
path_in_repo=quantized_model_outfile.name,
|
106 |
-
)
|
107 |
-
log("Upload complete!")
|
108 |
-
except Exception as e:
|
109 |
-
log(f"An error occurred: {e}")
|
110 |
-
finally:
|
111 |
-
# Remove temporary directories
|
112 |
-
original_model_dir.rmdir()
|
113 |
-
quantized_model_dir.rmdir()
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
+
import subprocess
|
3 |
+
import streamlit as st
|
4 |
+
from huggingface_hub import snapshot_download
|
|
|
5 |
|
6 |
+
# Define quantization types
|
7 |
+
QUANT_TYPES = [
|
8 |
+
"Q2_K", "Q3_K_M", "Q3_K_S", "Q4_K_M", "Q4_K_S",
|
9 |
+
"Q5_K_M", "Q5_K_S", "Q6_K"
|
10 |
+
]
|
11 |
|
12 |
+
def download_model(hf_model_name, output_dir="models"):
|
13 |
+
"""
|
14 |
+
Downloads a Hugging Face model and saves it locally.
|
15 |
+
"""
|
16 |
+
st.write(f"π₯ Downloading `{hf_model_name}` from Hugging Face...")
|
17 |
+
os.makedirs(output_dir, exist_ok=True)
|
18 |
+
snapshot_download(repo_id=hf_model_name, local_dir=output_dir, local_dir_use_symlinks=False)
|
19 |
+
st.success("β
Model downloaded successfully!")
|
20 |
+
|
21 |
+
def convert_to_gguf(model_dir, output_file):
|
22 |
+
"""
|
23 |
+
Converts a Hugging Face model to GGUF format.
|
24 |
+
"""
|
25 |
+
st.write(f"π Converting `{model_dir}` to GGUF format...")
|
26 |
+
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
27 |
+
cmd = [
|
28 |
+
"python3",
|
29 |
+
"llama.cpp/convert-hf-to-gguf.py",
|
30 |
+
"--model", model_dir,
|
31 |
+
"--outtype f16",
|
32 |
+
"--outfile", output_file
|
33 |
+
]
|
34 |
+
process = subprocess.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
35 |
+
if process.returncode == 0:
|
36 |
+
st.success(f"β
Conversion complete: `{output_file}`")
|
37 |
+
else:
|
38 |
+
st.error(f"β Conversion failed: {process.stderr}")
|
39 |
|
40 |
+
def quantize_llama(model_path, quantized_output_path, quant_type):
|
|
|
41 |
"""
|
42 |
+
Quantizes a GGUF model.
|
43 |
"""
|
44 |
+
st.write(f"β‘ Quantizing `{model_path}` with `{quant_type}` precision...")
|
45 |
+
os.makedirs(os.path.dirname(quantized_output_path), exist_ok=True)
|
46 |
+
cmd = [
|
47 |
+
"./llama.cpp/build/bin/llama-quantize",
|
48 |
+
model_path,
|
49 |
+
quantized_output_path,
|
50 |
+
quant_type
|
51 |
+
]
|
52 |
+
process = subprocess.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
53 |
+
if process.returncode == 0:
|
54 |
+
st.success(f"β
Quantized model saved at `{quantized_output_path}`")
|
55 |
+
else:
|
56 |
+
st.error(f"β Quantization failed: {process.stderr}")
|
57 |
+
|
58 |
+
def automate_llama_quantization(hf_model_name, quant_type):
|
59 |
+
"""
|
60 |
+
Orchestrates the entire quantization process.
|
61 |
+
"""
|
62 |
+
output_dir = "models"
|
63 |
+
gguf_file = os.path.join(output_dir, f"{hf_model_name.replace('/', '_')}.gguf")
|
64 |
+
quantized_file = gguf_file.replace(".gguf", f"-{quant_type}.gguf")
|
65 |
+
|
66 |
+
progress_bar = st.progress(0)
|
67 |
+
|
68 |
+
# Step 1: Download
|
69 |
+
st.write("### Step 1: Downloading Model")
|
70 |
+
download_model(hf_model_name, output_dir)
|
71 |
+
progress_bar.progress(33)
|
72 |
+
|
73 |
+
# Step 2: Convert to GGUF
|
74 |
+
st.write("### Step 2: Converting Model to GGUF Format")
|
75 |
+
convert_to_gguf(output_dir, gguf_file)
|
76 |
+
progress_bar.progress(66)
|
77 |
+
|
78 |
+
# Step 3: Quantize Model
|
79 |
+
st.write("### Step 3: Quantizing Model")
|
80 |
+
quantize_llama(gguf_file, quantized_file, quant_type)
|
81 |
+
progress_bar.progress(100)
|
82 |
+
|
83 |
+
st.success(f"π All steps completed! Quantized model available at: `{quantized_file}`")
|
84 |
+
return quantized_file
|
85 |
+
|
86 |
+
# Streamlit UI
|
87 |
+
st.title("π¦ LLaMA Model Quantization (llama.cpp)")
|
88 |
|
89 |
+
hf_model_name = st.text_input("Enter Hugging Face Model Name", "TheBloke/Llama-2-7B-chat-GGUF")
|
90 |
+
quant_type = st.selectbox("Select Quantization Type", QUANT_TYPES)
|
91 |
+
start_button = st.button("π Start Quantization")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
+
if start_button:
|
94 |
+
with st.spinner("Processing..."):
|
95 |
+
quantized_model_path = automate_llama_quantization(hf_model_name, quant_type)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
+
if quantized_model_path:
|
98 |
+
with open(quantized_model_path, "rb") as f:
|
99 |
+
st.download_button("β¬οΈ Download Quantized Model", f, file_name=os.path.basename(quantized_model_path))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|