Spaces:
Running
Running
File size: 4,074 Bytes
660a3f9 417e4f5 660a3f9 5f5a51c 2d57173 417e4f5 660a3f9 21e2278 417e4f5 2d57173 417e4f5 a49cccb 417e4f5 660a3f9 417e4f5 660a3f9 417e4f5 660a3f9 417e4f5 4e3abaa c306a09 b7b75e0 c306a09 417e4f5 c306a09 417e4f5 18c296f 417e4f5 660a3f9 cd97a22 417e4f5 660a3f9 417e4f5 18c296f 417e4f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import os
import subprocess
import streamlit as st
from huggingface_hub import snapshot_download
import subprocess
# Recompile llama.cpp before running
subprocess.run(["make", "clean"], cwd="/home/user/app/llama.cpp", check=True)
subprocess.run(["make"], cwd="/home/user/app/llama.cpp", check=True)
def check_directory_path(directory_name: str) -> str:
if os.path.exists(directory_name):
path = os.path.abspath(directory_name)
return str(path)
# Define quantization types
QUANT_TYPES = [
"Q2_K", "Q3_K_M", "Q3_K_S", "Q4_K_M", "Q4_K_S",
"Q5_K_M", "Q5_K_S", "Q6_K"
]
model_dir_path=check_directory_path("llama.cpp")
def download_model(hf_model_name, output_dir="models"):
"""
Downloads a Hugging Face model and saves it locally.
"""
st.write(f"π₯ Downloading `{hf_model_name}` from Hugging Face...")
os.makedirs(output_dir, exist_ok=True)
snapshot_download(repo_id=hf_model_name, local_dir=output_dir, local_dir_use_symlinks=False)
st.success("β
Model downloaded successfully!")
def convert_to_gguf(model_dir, output_file):
"""
Converts a Hugging Face model to GGUF format.
"""
st.write(f"π Converting `{model_dir}` to GGUF format...")
os.makedirs(os.path.dirname(output_file), exist_ok=True)
st.write(model_dir_path)
cmd = [
"python3", f"{model_dir_path}/convert_hf_to_gguf.py", model_dir,
"--outtype", "f16", "--outfile", output_file
]
process = subprocess.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if process.returncode == 0:
st.success(f"β
Conversion complete: `{output_file}`")
else:
st.error(f"β Conversion failed: {process.stderr}")
def quantize_llama(model_path, quantized_output_path, quant_type):
"""
Quantizes a GGUF model.
"""
st.write(f"β‘ Quantizing `{model_path}` with `{quant_type}` precision...")
os.makedirs(os.path.dirname(quantized_output_path), exist_ok=True)
quantize_path = f"{model_dir_path}/build/bin/llama-quantize"
subprocess.run(["chmod", "+x", quantize_path], check=True)
cmd = [
f"{model_dir_path}/build/bin/llama-quantize",
model_path,
quantized_output_path,
quant_type
]
process = subprocess.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if process.returncode == 0:
st.success(f"β
Quantized model saved at `{quantized_output_path}`")
else:
st.error(f"β Quantization failed: {process.stderr}")
def automate_llama_quantization(hf_model_name, quant_type):
"""
Orchestrates the entire quantization process.
"""
output_dir = "models"
gguf_file = os.path.join(output_dir, f"{hf_model_name.replace('/', '_')}.gguf")
quantized_file = gguf_file.replace(".gguf", f"-{quant_type}.gguf")
progress_bar = st.progress(0)
# Step 1: Download
st.write("### Step 1: Downloading Model")
download_model(hf_model_name, output_dir)
progress_bar.progress(33)
# Step 2: Convert to GGUF
st.write("### Step 2: Converting Model to GGUF Format")
convert_to_gguf(output_dir, gguf_file)
progress_bar.progress(66)
# Step 3: Quantize Model
st.write("### Step 3: Quantizing Model")
quantize_llama(gguf_file, quantized_file, quant_type.lower())
progress_bar.progress(100)
st.success(f"π All steps completed! Quantized model available at: `{quantized_file}`")
return quantized_file
# Streamlit UI
st.title("π¦ LLaMA Model Quantization (llama.cpp)")
hf_model_name = st.text_input("Enter Hugging Face Model Name", "Qwen/Qwen2.5-1.5B")
quant_type = st.selectbox("Select Quantization Type", QUANT_TYPES)
start_button = st.button("π Start Quantization")
if start_button:
with st.spinner("Processing..."):
quantized_model_path = automate_llama_quantization(hf_model_name, quant_type)
if quantized_model_path:
with open(quantized_model_path, "rb") as f:
st.download_button("β¬οΈ Download Quantized Model", f, file_name=os.path.basename(quantized_model_path))
|