KBaba7 commited on
Commit
417e4f5
Β·
verified Β·
1 Parent(s): a1a4268

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -104
app.py CHANGED
@@ -1,113 +1,99 @@
1
-
2
- import streamlit as st
3
- import subprocess
4
  import os
5
- import requests
6
- from huggingface_hub import snapshot_download, login, HfApi
7
- from pathlib import Path
8
- import tempfile
9
 
10
- # Define paths for llama.cpp binaries
11
- LLAMA_CPP_PATH = "https://huggingface.co/spaces/KBaba7/llama.cpp/tree/main/llama.cpp"
12
- LLAMA_CPP_BIN = "build/bin"
13
- BUILD_DIR = "build"
14
- CONVERT_SCRIPT = "convert-hf-to-gguf.py" # Ensure correct path
15
 
16
- def run_command(command):
17
- """ Run a shell command and return its output. """
18
- result = subprocess.run(
19
- command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True
20
- )
21
- return result.stdout, result.stderr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- st.title("LLAMA Quantization Pipeline")
24
- st.markdown(
25
  """
26
- This tool downloads a model from Hugging Face, converts it to GGUF format, quantizes it, and provides an option to download the final model.
27
  """
28
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- st.sidebar.header("Settings")
31
- st.sidebar.write("Please login to your Hugging Face account to use your llama.cpp repository.")
32
- username = st.sidebar.text_input("Hugging Face Username")
33
- password = st.sidebar.text_input("Hugging Face Password", type="password")
34
- model_repo_id = st.sidebar.text_input("Model Repository ID", "Qwen/Qwen2.5-3B")
35
- quantization_options = ["q4_k_m", "q4_0", "q4_1"]
36
- quantization_type = st.sidebar.selectbox("Select Quantization Type", quantization_options)
37
- quant_options = ["f32", "f16", "bf16", "q8_0", "auto"]
38
- quant_type = st.sidebar.selectbox("Select GGUF Output Type", quant_options)
39
- upload_option = st.sidebar.checkbox("Upload quantized model to Hugging Face?", value=False)
40
- run_button = st.button("Run Pipeline")
41
 
42
- if run_button:
43
- st.info("Starting the pipeline. Please be patient...")
44
- log_area = st.empty()
45
- logs = []
46
-
47
- def log(message):
48
- logs.append(message)
49
- log_area.text("\n".join(logs))
50
-
51
- try:
52
- # Download the llama.cpp repository
53
- snapshot_download(repo_id="KBaba7/llama.cpp", local_dir="llama.cpp", repo_type="space")
54
-
55
- # Create temporary directories for the original and quantized models
56
- temp_path = Path(tempfile.gettempdir())
57
- original_model_dir = temp_path / "original_model"
58
- quantized_model_dir = temp_path / "quantized_model"
59
- original_model_dir.mkdir(parents=True, exist_ok=True)
60
- quantized_model_dir.mkdir(parents=True, exist_ok=True)
61
-
62
- log("Downloading model from Hugging Face...")
63
- snapshot_download(repo_id=model_repo_id, local_dir=str(original_model_dir), local_dir_use_symlinks=False)
64
- log(f"Model downloaded to: {original_model_dir}")
65
-
66
- log("Converting model to GGUF format...")
67
- conversion_outfile = quantized_model_dir / "model_converted.gguf"
68
- conversion_cmd = (
69
- f"python3 convert-hf-to-gguf.py {original_model_dir} --outtype {quant_type} "
70
- f"--outfile {conversion_outfile}"
71
- )
72
- conv_stdout, conv_stderr = run_command(conversion_cmd)
73
- log(conv_stdout + conv_stderr)
74
-
75
- if not conversion_outfile.exists():
76
- log("Error: GGUF conversion failed! No output file found.")
77
- st.error("GGUF conversion failed. Check logs.")
78
- st.stop()
79
 
80
- log("Quantizing the model...")
81
- quantized_model_outfile = quantized_model_dir / f"model_quantized_{quantization_type}.gguf"
82
- quantize_cmd = f"build/bin/llama-quantize {conversion_outfile} {quantized_model_outfile} {quantization_type}"
83
- quant_stdout, quant_stderr = run_command(quantize_cmd)
84
- log(quant_stdout + quant_stderr)
85
-
86
- if not quantized_model_outfile.exists():
87
- log("Error: Quantization failed! No output file found.")
88
- st.error("Quantization failed. Check logs.")
89
- st.stop()
90
-
91
- log("Pipeline completed successfully!")
92
- st.success("Quantized model ready for download.")
93
- with open(quantized_model_outfile, "rb") as file:
94
- st.download_button(label="Download Quantized Model", data=file, file_name=quantized_model_outfile.name)
95
-
96
- # Upload if selected
97
- if upload_option:
98
- log("Uploading quantized model to Hugging Face...")
99
- login(username, password)
100
- api = HfApi()
101
- target_repo = f"automated-quantization/{quantized_model_outfile.stem}"
102
- api.create_repo(target_repo, exist_ok=True, repo_type="model")
103
- api.upload_file(
104
- path_or_fileobj=str(quantized_model_outfile),
105
- path_in_repo=quantized_model_outfile.name,
106
- )
107
- log("Upload complete!")
108
- except Exception as e:
109
- log(f"An error occurred: {e}")
110
- finally:
111
- # Remove temporary directories
112
- original_model_dir.rmdir()
113
- quantized_model_dir.rmdir()
 
 
 
 
1
  import os
2
+ import subprocess
3
+ import streamlit as st
4
+ from huggingface_hub import snapshot_download
 
5
 
6
+ # Define quantization types
7
+ QUANT_TYPES = [
8
+ "Q2_K", "Q3_K_M", "Q3_K_S", "Q4_K_M", "Q4_K_S",
9
+ "Q5_K_M", "Q5_K_S", "Q6_K"
10
+ ]
11
 
12
+ def download_model(hf_model_name, output_dir="models"):
13
+ """
14
+ Downloads a Hugging Face model and saves it locally.
15
+ """
16
+ st.write(f"πŸ“₯ Downloading `{hf_model_name}` from Hugging Face...")
17
+ os.makedirs(output_dir, exist_ok=True)
18
+ snapshot_download(repo_id=hf_model_name, local_dir=output_dir, local_dir_use_symlinks=False)
19
+ st.success("βœ… Model downloaded successfully!")
20
+
21
+ def convert_to_gguf(model_dir, output_file):
22
+ """
23
+ Converts a Hugging Face model to GGUF format.
24
+ """
25
+ st.write(f"πŸ”„ Converting `{model_dir}` to GGUF format...")
26
+ os.makedirs(os.path.dirname(output_file), exist_ok=True)
27
+ cmd = [
28
+ "python3",
29
+ "llama.cpp/convert-hf-to-gguf.py",
30
+ "--model", model_dir,
31
+ "--outtype f16",
32
+ "--outfile", output_file
33
+ ]
34
+ process = subprocess.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
35
+ if process.returncode == 0:
36
+ st.success(f"βœ… Conversion complete: `{output_file}`")
37
+ else:
38
+ st.error(f"❌ Conversion failed: {process.stderr}")
39
 
40
+ def quantize_llama(model_path, quantized_output_path, quant_type):
 
41
  """
42
+ Quantizes a GGUF model.
43
  """
44
+ st.write(f"⚑ Quantizing `{model_path}` with `{quant_type}` precision...")
45
+ os.makedirs(os.path.dirname(quantized_output_path), exist_ok=True)
46
+ cmd = [
47
+ "./llama.cpp/build/bin/llama-quantize",
48
+ model_path,
49
+ quantized_output_path,
50
+ quant_type
51
+ ]
52
+ process = subprocess.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
53
+ if process.returncode == 0:
54
+ st.success(f"βœ… Quantized model saved at `{quantized_output_path}`")
55
+ else:
56
+ st.error(f"❌ Quantization failed: {process.stderr}")
57
+
58
+ def automate_llama_quantization(hf_model_name, quant_type):
59
+ """
60
+ Orchestrates the entire quantization process.
61
+ """
62
+ output_dir = "models"
63
+ gguf_file = os.path.join(output_dir, f"{hf_model_name.replace('/', '_')}.gguf")
64
+ quantized_file = gguf_file.replace(".gguf", f"-{quant_type}.gguf")
65
+
66
+ progress_bar = st.progress(0)
67
+
68
+ # Step 1: Download
69
+ st.write("### Step 1: Downloading Model")
70
+ download_model(hf_model_name, output_dir)
71
+ progress_bar.progress(33)
72
+
73
+ # Step 2: Convert to GGUF
74
+ st.write("### Step 2: Converting Model to GGUF Format")
75
+ convert_to_gguf(output_dir, gguf_file)
76
+ progress_bar.progress(66)
77
+
78
+ # Step 3: Quantize Model
79
+ st.write("### Step 3: Quantizing Model")
80
+ quantize_llama(gguf_file, quantized_file, quant_type)
81
+ progress_bar.progress(100)
82
+
83
+ st.success(f"πŸŽ‰ All steps completed! Quantized model available at: `{quantized_file}`")
84
+ return quantized_file
85
+
86
+ # Streamlit UI
87
+ st.title("πŸ¦™ LLaMA Model Quantization (llama.cpp)")
88
 
89
+ hf_model_name = st.text_input("Enter Hugging Face Model Name", "TheBloke/Llama-2-7B-chat-GGUF")
90
+ quant_type = st.selectbox("Select Quantization Type", QUANT_TYPES)
91
+ start_button = st.button("πŸš€ Start Quantization")
 
 
 
 
 
 
 
 
92
 
93
+ if start_button:
94
+ with st.spinner("Processing..."):
95
+ quantized_model_path = automate_llama_quantization(hf_model_name, quant_type)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
+ if quantized_model_path:
98
+ with open(quantized_model_path, "rb") as f:
99
+ st.download_button("⬇️ Download Quantized Model", f, file_name=os.path.basename(quantized_model_path))