Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel, BitsAndBytesConfig | |
import tempfile | |
from huggingface_hub import HfApi | |
from huggingface_hub import list_models | |
from gradio_huggingfacehub_search import HuggingfaceHubSearch | |
from bitsandbytes.nn import Linear4bit | |
from packaging import version | |
import os | |
def hello(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) -> str: | |
if profile is None: | |
return "π Hello! Sign in to get started with the BitsAndBytes Quantizer." | |
return f"π Hello {profile.name}! Welcome to the BitsAndBytes Quantizer." | |
def check_model_exists(oauth_token: gr.OAuthToken | None, username, model_name, quantized_model_name): | |
"""Check if a model exists in the user's Hugging Face repository.""" | |
try: | |
models = list_models(author=username, token=oauth_token.token) | |
model_names = [model.id for model in models] | |
if quantized_model_name : | |
repo_name = f"{username}/{quantized_model_name}" | |
else : | |
repo_name = f"{username}/{model_name.split('/')[-1]}-BNB-INT4" | |
if repo_name in model_names: | |
return f"Model '{repo_name}' already exists in your repository." | |
else: | |
return None # Model does not exist | |
except Exception as e: | |
return f"Error checking model existence: {str(e)}" | |
def create_model_card(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4): | |
model_card = f"""--- | |
base_model: | |
- {model_name} | |
--- | |
# {model_name} (Quantized) | |
## Description | |
This model is a quantized version of the original model `{model_name}`. It has been quantized using int4 quantization with bitsandbytes. | |
## Quantization Details | |
- **Quantization Type**: int4 | |
- **bnb_4bit_quant_type**: {quant_type_4} | |
- **bnb_4bit_use_double_quant**: {double_quant_4} | |
- **bnb_4bit_compute_dtype**: {compute_type_4} | |
- **bnb_4bit_quant_storage**: {quant_storage_4} | |
## Usage | |
You can use this model in your applications by loading it directly from the Hugging Face Hub: | |
```python | |
from transformers import AutoModel | |
model = AutoModel.from_pretrained("{model_name}")""" | |
return model_card | |
def load_model(model_name, quantization_config, auth_token) : | |
return AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token) | |
DTYPE_MAPPING = { | |
"int8": torch.int8, | |
"uint8": torch.uint8, | |
"float16": torch.float16, | |
"float32": torch.float32, | |
"bfloat16": torch.bfloat16, | |
} | |
def quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, auth_token=None): | |
print(f"Quantizing model: {quant_type_4}") | |
quantization_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_quant_type=quant_type_4, | |
bnb_4bit_use_double_quant=True if double_quant_4 == "True" else False, | |
bnb_4bit_quant_storage=DTYPE_MAPPING[quant_storage_4], | |
bnb_4bit_compute_dtype=DTYPE_MAPPING[compute_type_4], | |
) | |
model = AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token) | |
for _ , module in model.named_modules(): | |
if isinstance(module, Linear4bit): | |
module.to("cuda") | |
module.to("cpu") | |
return model | |
def save_model(model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, username=None, auth_token=None, quantized_model_name=None, public=False): | |
print("Saving quantized model") | |
with tempfile.TemporaryDirectory() as tmpdirname: | |
model.save_pretrained(tmpdirname, safe_serialization=True, use_auth_token=auth_token.token) | |
if quantized_model_name : | |
repo_name = f"{username}/{quantized_model_name}" | |
else : | |
repo_name = f"{username}/{model_name.split('/')[-1]}-BNB-INT4" | |
model_card = create_model_card(repo_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4) | |
with open(os.path.join(tmpdirname, "README.md"), "w") as f: | |
f.write(model_card) | |
# Push to Hub | |
api = HfApi(token=auth_token.token) | |
api.create_repo(repo_name, exist_ok=True, private=not public) | |
api.upload_folder( | |
folder_path=tmpdirname, | |
repo_id=repo_name, | |
repo_type="model", | |
) | |
return f""" | |
<div class="success-box"> | |
<h2>π Quantization Complete!</h2> | |
<p>Your quantized model is now available at:</p> | |
<a href="https://huggingface.co/{repo_name}" target="_blank" class="model-link"> | |
huggingface.co/{repo_name} | |
</a> | |
</div> | |
""" | |
def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public): | |
if oauth_token is None : | |
return """ | |
<div class="error-box"> | |
<h3>β Authentication Error</h3> | |
<p>Please sign in to your HuggingFace account to use the quantizer.</p> | |
</div> | |
""" | |
if not profile: | |
return """ | |
<div class="error-box"> | |
<h3>β Authentication Error</h3> | |
<p>Please sign in to your HuggingFace account to use the quantizer.</p> | |
</div> | |
""" | |
exists_message = check_model_exists(oauth_token, profile.username, model_name, quantized_model_name) | |
if exists_message : | |
return f""" | |
<div class="warning-box"> | |
<h3>β οΈ Model Already Exists</h3> | |
<p>{exists_message}</p> | |
</div> | |
""" | |
try: | |
quantized_model = quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, oauth_token) | |
return save_model(quantized_model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, profile.username, oauth_token, quantized_model_name, public) | |
except Exception as e : | |
print(e) | |
return f""" | |
<div class="error-box"> | |
<h3>β Error Occurred</h3> | |
<p>{str(e)}</p> | |
</div> | |
""" | |
css = """ | |
:root { | |
--primary: #6366f1; | |
--primary-light: #818cf8; | |
--primary-dark: #4f46e5; | |
--secondary: #10b981; | |
--accent: #f97316; | |
--background: #f8fafc; | |
--text: #1e293b; | |
--card-bg: #ffffff; | |
--input-bg: #f1f5f9; | |
--error: #ef4444; | |
--warning: #f59e0b; | |
--success: #10b981; | |
--border-radius: 12px; | |
--shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06); | |
--transition: all 0.3s ease; | |
} | |
body, .gradio-container { | |
font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', sans-serif; | |
color: var(--text); | |
background-color: var(--background); | |
} | |
h1 { | |
font-size: 2.5rem !important; | |
font-weight: 800 !important; | |
text-align: center; | |
background: linear-gradient(45deg, var(--primary), var(--accent)); | |
-webkit-background-clip: text; | |
background-clip: text; | |
color: transparent !important; | |
margin-bottom: 1rem !important; | |
padding: 1rem 0 !important; | |
} | |
h2 { | |
font-size: 1.75rem !important; | |
font-weight: 700 !important; | |
color: var(--primary-dark) !important; | |
margin-top: 1.5rem !important; | |
margin-bottom: 1rem !important; | |
} | |
h3 { | |
font-size: 1.25rem !important; | |
font-weight: 600 !important; | |
color: var(--primary) !important; | |
margin-top: 1rem !important; | |
margin-bottom: 0.5rem !important; | |
border-bottom: 2px solid var(--primary-light); | |
padding-bottom: 0.5rem; | |
width: fit-content; | |
} | |
/* Main container styling */ | |
.main-container { | |
max-width: 1200px; | |
margin: 0 auto; | |
padding: 2rem; | |
background-color: var(--card-bg); | |
border-radius: var(--border-radius); | |
box-shadow: var(--shadow); | |
} | |
/* Button styling */ | |
button { | |
border-radius: var(--border-radius) !important; | |
font-weight: 600 !important; | |
transition: var(--transition) !important; | |
text-transform: uppercase; | |
letter-spacing: 0.5px; | |
} | |
button.primary { | |
background: linear-gradient(135deg, var(--primary), var(--primary-dark)) !important; | |
border: none !important; | |
color: white !important; | |
padding: 12px 24px !important; | |
box-shadow: 0 4px 6px -1px rgba(99, 102, 241, 0.4) !important; | |
} | |
button.primary:hover { | |
transform: translateY(-2px) !important; | |
box-shadow: 0 8px 15px -3px rgba(99, 102, 241, 0.5) !important; | |
} | |
/* Login button styling */ | |
#login-button { | |
margin: 1.5rem auto !important; | |
min-width: 200px !important; | |
background: linear-gradient(135deg, var(--primary), var(--primary-dark)) !important; | |
color: white !important; | |
font-weight: 600 !important; | |
padding: 12px 24px !important; | |
border-radius: var(--border-radius) !important; | |
border: none !important; | |
box-shadow: 0 4px 6px -1px rgba(99, 102, 241, 0.4) !important; | |
transition: var(--transition) !important; | |
} | |
#login-button:hover { | |
transform: translateY(-2px) !important; | |
box-shadow: 0 8px 15px -3px rgba(99, 102, 241, 0.5) !important; | |
} | |
/* Toggle button styling */ | |
#toggle-button { | |
background: transparent !important; | |
color: var(--primary) !important; | |
border: 2px solid var(--primary-light) !important; | |
padding: 8px 16px !important; | |
margin: 1rem 0 !important; | |
border-radius: var(--border-radius) !important; | |
transition: var(--transition) !important; | |
font-weight: 600 !important; | |
} | |
#toggle-button:hover { | |
background-color: var(--primary-light) !important; | |
color: white !important; | |
} | |
/* Input fields styling */ | |
input, select, textarea { | |
border-radius: var(--border-radius) !important; | |
border: 2px solid var(--input-bg) !important; | |
padding: 10px 16px !important; | |
background-color: var(--input-bg) !important; | |
transition: var(--transition) !important; | |
} | |
input:focus, select:focus, textarea:focus { | |
border-color: var(--primary-light) !important; | |
box-shadow: 0 0 0 2px rgba(99, 102, 241, 0.2) !important; | |
} | |
/* Dropdown styling with nice hover effects */ | |
.gradio-dropdown > div { | |
border-radius: var(--border-radius) !important; | |
border: 2px solid var(--input-bg) !important; | |
overflow: hidden !important; | |
transition: var(--transition) !important; | |
} | |
.gradio-dropdown > div:hover { | |
border-color: var(--primary-light) !important; | |
} | |
/* Radio and checkbox styling */ | |
.gradio-radio, .gradio-checkbox { | |
background-color: var(--card-bg) !important; | |
border-radius: var(--border-radius) !important; | |
padding: 12px !important; | |
margin-bottom: 16px !important; | |
transition: var(--transition) !important; | |
border: 2px solid var(--input-bg) !important; | |
} | |
.gradio-radio:hover, .gradio-checkbox:hover { | |
border-color: var(--primary-light) !important; | |
} | |
.gradio-radio input[type="radio"] + label { | |
padding: 8px 12px !important; | |
border-radius: 20px !important; | |
margin-right: 8px !important; | |
background-color: var(--input-bg) !important; | |
transition: var(--transition) !important; | |
} | |
.gradio-radio input[type="radio"]:checked + label { | |
background-color: var(--primary) !important; | |
color: white !important; | |
} | |
/* Custom spacing and layout */ | |
.gradio-row { | |
margin-bottom: 24px !important; | |
} | |
.option-row { | |
display: flex !important; | |
gap: 16px !important; | |
margin-bottom: 16px !important; | |
} | |
/* Card-like sections */ | |
.card-section { | |
background-color: var(--card-bg) !important; | |
border-radius: var(--border-radius) !important; | |
padding: 20px !important; | |
margin-bottom: 24px !important; | |
box-shadow: var(--shadow) !important; | |
border: 1px solid rgba(0, 0, 0, 0.05) !important; | |
} | |
/* Search box styling */ | |
.search-box input { | |
border-radius: var(--border-radius) !important; | |
border: 2px solid var(--input-bg) !important; | |
padding: 12px 20px !important; | |
box-shadow: var(--shadow) !important; | |
transition: var(--transition) !important; | |
} | |
.search-box input:focus { | |
border-color: var(--primary) !important; | |
box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.3) !important; | |
} | |
/* Model name textbox specific styling */ | |
.model-name-textbox { | |
border: 2px solid var(--input-bg) !important; | |
border-radius: var(--border-radius) !important; | |
transition: var(--transition) !important; | |
} | |
.model-name-textbox:focus-within { | |
border-color: var(--primary) !important; | |
box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.3) !important; | |
} | |
/* Success, warning and error boxes */ | |
.success-box, .warning-box, .error-box { | |
border-radius: var(--border-radius) !important; | |
padding: 20px !important; | |
margin: 20px 0 !important; | |
box-shadow: var(--shadow) !important; | |
animation: fadeIn 0.5s ease-in-out; | |
} | |
.success-box { | |
background-color: rgba(16, 185, 129, 0.1) !important; | |
border: 2px solid var(--success) !important; | |
} | |
.warning-box { | |
background-color: rgba(245, 158, 11, 0.1) !important; | |
border: 2px solid var(--warning) !important; | |
} | |
.error-box { | |
background-color: rgba(239, 68, 68, 0.1) !important; | |
border: 2px solid var(--error) !important; | |
} | |
/* Model link styling */ | |
.model-link { | |
display: inline-block !important; | |
background: linear-gradient(135deg, var(--primary), var(--primary-dark)) !important; | |
color: white !important; | |
text-decoration: none !important; | |
padding: 12px 24px !important; | |
border-radius: var(--border-radius) !important; | |
font-weight: 600 !important; | |
margin-top: 16px !important; | |
box-shadow: 0 4px 6px -1px rgba(99, 102, 241, 0.4) !important; | |
transition: var(--transition) !important; | |
} | |
.model-link:hover { | |
transform: translateY(-2px) !important; | |
box-shadow: 0 8px 15px -3px rgba(99, 102, 241, 0.5) !important; | |
} | |
/* Instructions section */ | |
.instructions-container { | |
background-color: rgba(99, 102, 241, 0.05) !important; | |
border-left: 4px solid var(--primary) !important; | |
padding: 16px !important; | |
margin: 24px 0 !important; | |
border-radius: 0 var(--border-radius) var(--border-radius) 0 !important; | |
} | |
/* Animations */ | |
@keyframes fadeIn { | |
from { opacity: 0; transform: translateY(10px); } | |
to { opacity: 1; transform: translateY(0); } | |
} | |
/* Responsive adjustments */ | |
@media (max-width: 768px) { | |
.option-row { | |
flex-direction: column !important; | |
} | |
} | |
/* Add a nice gradient splash to the app */ | |
.gradio-container::before { | |
content: ""; | |
position: absolute; | |
top: 0; | |
left: 0; | |
right: 0; | |
height: 10px; | |
background: linear-gradient(90deg, var(--primary), var(--accent)); | |
z-index: 100; | |
} | |
/* Stylish header */ | |
.app-header { | |
display: flex; | |
flex-direction: column; | |
align-items: center; | |
margin-bottom: 2rem; | |
position: relative; | |
} | |
.app-header::after { | |
content: ""; | |
position: absolute; | |
bottom: -10px; | |
left: 50%; | |
transform: translateX(-50%); | |
width: 80px; | |
height: 4px; | |
background: linear-gradient(90deg, var(--primary), var(--accent)); | |
border-radius: 2px; | |
} | |
/* Section headers */ | |
.section-header { | |
display: flex; | |
align-items: center; | |
margin-bottom: 1rem; | |
} | |
.section-header::before { | |
content: "βοΈ"; | |
margin-right: 8px; | |
font-size: 1.25rem; | |
} | |
/* Quantize button special styling */ | |
#quantize-button { | |
background: linear-gradient(135deg, var(--primary), var(--accent)) !important; | |
color: white !important; | |
padding: 16px 32px !important; | |
font-size: 1.1rem !important; | |
font-weight: 700 !important; | |
border: none !important; | |
border-radius: var(--border-radius) !important; | |
box-shadow: 0 4px 15px -3px rgba(99, 102, 241, 0.5) !important; | |
transition: all 0.3s cubic-bezier(0.25, 0.8, 0.25, 1) !important; | |
position: relative; | |
overflow: hidden; | |
} | |
#quantize-button:hover { | |
transform: translateY(-3px) !important; | |
box-shadow: 0 7px 20px -2px rgba(99, 102, 241, 0.6) !important; | |
} | |
#quantize-button::after { | |
content: ""; | |
position: absolute; | |
top: 0; | |
left: 0; | |
width: 100%; | |
height: 100%; | |
background: linear-gradient(rgba(255, 255, 255, 0.2), rgba(255, 255, 255, 0)); | |
transform: translateY(-100%); | |
transition: transform 0.6s cubic-bezier(0.25, 0.8, 0.25, 1); | |
} | |
#quantize-button:hover::after { | |
transform: translateY(0); | |
} | |
""" | |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="emerald"), css=css) as demo: | |
with gr.Column(elem_classes="main-container"): | |
with gr.Row(elem_classes="app-header"): | |
gr.Markdown( | |
""" | |
<h1 style="text-align: center; margin-bottom: 1rem; font-size: 1.2rem; color: #4b5563;"> π€ BitsAndBytes Model Quantizer</h1> | |
<div style="text-align: center; margin-bottom: 1rem; font-size: 1.2rem; color: #4b5563;"> | |
Welcome to the BitsAndBytes Model Quantizer! | |
</div> | |
""" | |
) | |
gr.LoginButton(elem_id="login-button", elem_classes="login-button") | |
welcome_msg = gr.Markdown(elem_classes="welcome-message") | |
demo.load(hello, inputs=None, outputs=welcome_msg) | |
instructions = gr.Markdown( | |
""" | |
<div class="instructions-container"> | |
<h3>π Instructions</h3> | |
<ol> | |
<li>Login to your HuggingFace account</li> | |
<li>Enter the name of the Hugging Face LLM model you want to quantize</li> | |
<li>Configure quantization settings based on your needs</li> | |
<li>Optionally, specify a custom name for the quantized model</li> | |
<li>Click "Quantize Model" to start the process</li> | |
</ol> | |
<p><strong>Note:</strong> Processing time depends on model size and your hardware. Check container logs for progress!</p> | |
</div> | |
""", | |
visible=False | |
) | |
instructions_visible = gr.State(False) | |
toggle_button = gr.Button("βΌ Show Instructions", elem_id="toggle-button", elem_classes="toggle-button") | |
def toggle_instructions(instructions_visible): | |
new_visibility = not instructions_visible | |
new_label = "β² Hide Instructions" if new_visibility else "βΌ Show Instructions" | |
return gr.update(visible=new_visibility), new_visibility, gr.update(value=new_label) | |
toggle_button.click(toggle_instructions, instructions_visible, [instructions, instructions_visible, toggle_button]) | |
with gr.Row(elem_classes="app-content"): | |
with gr.Column(scale=1, elem_classes="card-section"): | |
with gr.Row(elem_classes="search-section"): | |
model_name = HuggingfaceHubSearch( | |
label="π Select Model", | |
placeholder=" Search for model on Huggingface Hub...", | |
search_type="model", | |
elem_classes="search-box" | |
) | |
with gr.Row(elem_classes="section-header"): | |
gr.Markdown("### Quantization Settings") | |
with gr.Column(elem_classes="settings-group"): | |
gr.Markdown("**Quantization Type**", elem_classes="setting-label") | |
quant_type_4 = gr.Dropdown( | |
choices=["fp4", "nf4"], | |
value="fp4", | |
label="Format", | |
info="The quantization data type in bnb.nn.Linear4Bit layers", | |
show_label=False | |
) | |
gr.Markdown("**Compute Settings**", elem_classes="setting-label") | |
compute_type_4 = gr.Dropdown( | |
choices=["float16", "bfloat16", "float32"], | |
value="float32", | |
label="Compute Type", | |
info="The compute dtype for matrix multiplication" | |
) | |
quant_storage_4 = gr.Dropdown( | |
choices=["float16", "float32", "int8", "uint8", "bfloat16"], | |
value="uint8", | |
label="Storage Type", | |
info="The storage type for quantized weights" | |
) | |
gr.Markdown("**Double Quantization**", elem_classes="setting-label") | |
double_quant_4 = gr.Radio( | |
["False", "True"], | |
label="Use Double Quantization", | |
info="Further compress model size with nested quantization", | |
value="False", | |
) | |
with gr.Row(elem_classes="section-header"): | |
gr.Markdown("### Output Settings") | |
with gr.Column(elem_classes="settings-group"): | |
quantized_model_name = gr.Textbox( | |
label="Custom Model Name (Optional)", | |
info="Leave blank to use default naming convention", | |
placeholder="my-quantized-model", | |
elem_classes="model-name-textbox" | |
) | |
public = gr.Checkbox( | |
label="Make model public", | |
info="If checked, your model will be publicly accessible on Hugging Face Hub", | |
value=False, | |
) | |
with gr.Column(scale=1, elem_classes="card-section"): | |
with gr.Row(): | |
gr.Markdown(""" | |
### π Quantization Benefits | |
<div style="background-color: rgba(99, 102, 241, 0.05); padding: 12px; border-radius: 8px; margin-bottom: 16px;"> | |
<p><strong>β‘ Lower Memory Usage:</strong> Reduce model size by up to 75%</p> | |
<p><strong>π Faster Inference:</strong> Achieve better performance on resource-constrained hardware</p> | |
<p><strong>π» Wider Compatibility:</strong> Run models on devices with limited VRAM</p> | |
</div> | |
### π§ Configuration Guide | |
<div style="background-color: rgba(16, 185, 129, 0.05); padding: 12px; border-radius: 8px;"> | |
<p><strong>Quantization Type:</strong></p> | |
<ul> | |
<li><code>fp4</code> - 4-bit floating point (better for most cases)</li> | |
<li><code>nf4</code> - normalized float format (better for specific models)</li> | |
</ul> | |
<p><strong>Double Quantization:</strong> Enable for additional compression with minimal quality loss</p> | |
</div> | |
""") | |
with gr.Row(): | |
quantize_button = gr.Button("π Quantize Model", variant="primary", elem_id="quantize-button") | |
output_link = gr.HTML(label="Results", elem_classes="results-container") | |
# Add interactive footer with links | |
gr.Markdown(""" | |
<div style="margin-top: 2rem; text-align: center; padding: 1rem; border-top: 1px solid rgba(99, 102, 241, 0.2);"> | |
<p>Powered by <a href="https://huggingface.co/" target="_blank" style="color: var(--primary); text-decoration: none; font-weight: 600;">Hugging Face</a> and <a href="https://github.com/TimDettmers/bitsandbytes" target="_blank" style="color: var(--primary); text-decoration: none; font-weight: 600;">BitsAndBytes</a></p> | |
</div> | |
""") | |
quantize_button.click( | |
fn=quantize_and_save, | |
inputs=[model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public], | |
outputs=[output_link] | |
) | |
if __name__ == "__main__": | |
demo.launch(share=True) |