import streamlit as st import subprocess import os import requests from huggingface_hub import snapshot_download, login, HfApi from pathlib import Path import tempfile # Define paths for llama.cpp binaries LLAMA_CPP_PATH = "https://huggingface.co/spaces/KBaba7/llama.cpp/tree/main/llama.cpp" LLAMA_CPP_BIN = "build/bin" BUILD_DIR = "build" CONVERT_SCRIPT = "convert-hf-to-gguf.py" # Ensure correct path def run_command(command): """ Run a shell command and return its output. """ result = subprocess.run( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True ) return result.stdout, result.stderr st.title("LLAMA Quantization Pipeline") st.markdown( """ This tool downloads a model from Hugging Face, converts it to GGUF format, quantizes it, and provides an option to download the final model. """ ) st.sidebar.header("Settings") st.sidebar.write("Please login to your Hugging Face account to use your llama.cpp repository.") username = st.sidebar.text_input("Hugging Face Username") password = st.sidebar.text_input("Hugging Face Password", type="password") model_repo_id = st.sidebar.text_input("Model Repository ID", "Qwen/Qwen2.5-3B") quantization_options = ["q4_k_m", "q4_0", "q4_1"] quantization_type = st.sidebar.selectbox("Select Quantization Type", quantization_options) quant_options = ["f32", "f16", "bf16", "q8_0", "auto"] quant_type = st.sidebar.selectbox("Select GGUF Output Type", quant_options) upload_option = st.sidebar.checkbox("Upload quantized model to Hugging Face?", value=False) run_button = st.button("Run Pipeline") if run_button: st.info("Starting the pipeline. Please be patient...") log_area = st.empty() logs = [] def log(message): logs.append(message) log_area.text("\n".join(logs)) try: # Download the llama.cpp repository snapshot_download(repo_id="KBaba7/llama.cpp", local_dir="llama.cpp", repo_type="space") # Create temporary directories for the original and quantized models temp_path = Path(tempfile.gettempdir()) original_model_dir = temp_path / "original_model" quantized_model_dir = temp_path / "quantized_model" original_model_dir.mkdir(parents=True, exist_ok=True) quantized_model_dir.mkdir(parents=True, exist_ok=True) log("Downloading model from Hugging Face...") snapshot_download(repo_id=model_repo_id, local_dir=str(original_model_dir), local_dir_use_symlinks=False) log(f"Model downloaded to: {original_model_dir}") log("Converting model to GGUF format...") conversion_outfile = quantized_model_dir / "model_converted.gguf" conversion_cmd = ( f"python3 convert-hf-to-gguf.py {original_model_dir} --outtype {quant_type} " f"--outfile {conversion_outfile}" ) conv_stdout, conv_stderr = run_command(conversion_cmd) log(conv_stdout + conv_stderr) if not conversion_outfile.exists(): log("Error: GGUF conversion failed! No output file found.") st.error("GGUF conversion failed. Check logs.") st.stop() log("Quantizing the model...") quantized_model_outfile = quantized_model_dir / f"model_quantized_{quantization_type}.gguf" quantize_cmd = f"build/bin/llama-quantize {conversion_outfile} {quantized_model_outfile} {quantization_type}" quant_stdout, quant_stderr = run_command(quantize_cmd) log(quant_stdout + quant_stderr) if not quantized_model_outfile.exists(): log("Error: Quantization failed! No output file found.") st.error("Quantization failed. Check logs.") st.stop() log("Pipeline completed successfully!") st.success("Quantized model ready for download.") with open(quantized_model_outfile, "rb") as file: st.download_button(label="Download Quantized Model", data=file, file_name=quantized_model_outfile.name) # Upload if selected if upload_option: log("Uploading quantized model to Hugging Face...") login(username, password) api = HfApi() target_repo = f"automated-quantization/{quantized_model_outfile.stem}" api.create_repo(target_repo, exist_ok=True, repo_type="model") api.upload_file( path_or_fileobj=str(quantized_model_outfile), path_in_repo=quantized_model_outfile.name, ) log("Upload complete!") except Exception as e: log(f"An error occurred: {e}") finally: # Remove temporary directories original_model_dir.rmdir() quantized_model_dir.rmdir()