Spaces:

medmekk
/

TorchAO_Quantization

Sleeping

App Files Files Community

MekkCyber commited on Oct 22, 2024

Commit

e5bb0c6

1 Parent(s): 3d2f5ba

AutoModel

Browse files

Files changed (1) hide show

app.py +16 -9

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 import torch
-from transformers import TorchAoConfig, AutoModelForCausalLM, AutoTokenizer
 import tempfile
 from huggingface_hub import HfApi
 from huggingface_hub import list_models
@@ -59,14 +59,14 @@ model = AutoModel.from_pretrained("{model_name}")"""
     return model_card
-def quantize_model(model_name, quantization_type, group_size=128, auth_token=None, username=None):
     print(f"Quantizing model: {quantization_type}")
     if quantization_type == "int4_weight_only" :
         quantization_config = TorchAoConfig(quantization_type, group_size=group_size)
-        model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cuda", torch_dtype=torch.bfloat16, quantization_config=quantization_config, use_auth_token=auth_token.token)
     else :
         quantization_config = TorchAoConfig(quantization_type)
-        model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cuda", torch_dtype=torch.bfloat16, quantization_config=quantization_config, use_auth_token=auth_token.token)
     return model
@@ -97,7 +97,7 @@ def save_model(model, model_name, quantization_type, group_size=128, username=No
     return f"https://huggingface.co/{repo_name}"
-def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, model_name, quantization_type, group_size, quantized_model_name):
     if oauth_token is None :
         return "Error : Please Sign In to your HuggingFace account to use the quantizer"
     if not profile:
@@ -105,14 +105,16 @@ def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToke
     exists_message = check_model_exists(oauth_token, profile.username, quantization_type, group_size, model_name, quantized_model_name)
     if exists_message :
         return exists_message
-    quantized_model = quantize_model(model_name, quantization_type, group_size, oauth_token, profile.username)
     return save_model(quantized_model, model_name, quantization_type, group_size, profile.username, oauth_token, quantized_model_name)
 with gr.Blocks(theme=gr.themes.Soft()) as app:
     gr.Markdown(
         """
-        # 🚀 Model Quantization App :hugging-torch:
         Quantize your favorite Hugging Face models and save them to your profile!
         """
@@ -141,6 +143,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
                 value=128,
                 interactive=True
             )
             quantized_model_name = gr.Textbox(
                 label="Model Name (optional : to override default)",
                 value="",
@@ -162,7 +169,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
         """
         ## Instructions
         1. Login to your HuggingFace account
-        2. Enter the name of the Hugging Face model you want to quantize (Make sure you have access to it)
         3. Choose the quantization type.
         4. Optionally, specify the group size.
         5. Optionally, choose a custom name for the quantized model
@@ -193,7 +200,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
     quantize_button.click(
         fn=quantize_and_save,
-        inputs=[model_name, quantization_type, group_size, quantized_model_name],
         outputs=[output_link]
     )

 import gradio as gr
 import torch
+from transformers import TorchAoConfig, AutoModelForCausalLM, AutoTokenizer, AutoModel
 import tempfile
 from huggingface_hub import HfApi
 from huggingface_hub import list_models
     return model_card
+def quantize_model(model_name, quantization_type, group_size=128, auth_token=None, username=None, device="cuda"):
     print(f"Quantizing model: {quantization_type}")
     if quantization_type == "int4_weight_only" :
         quantization_config = TorchAoConfig(quantization_type, group_size=group_size)
     else :
         quantization_config = TorchAoConfig(quantization_type)
+    model = AutoModel.from_pretrained(model_name, device_map=device, torch_dtype=torch.bfloat16, quantization_config=quantization_config, use_auth_token=auth_token.token)
     return model
     return f"https://huggingface.co/{repo_name}"
+def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, model_name, quantization_type, group_size, quantized_model_name, device):
     if oauth_token is None :
         return "Error : Please Sign In to your HuggingFace account to use the quantizer"
     if not profile:
     exists_message = check_model_exists(oauth_token, profile.username, quantization_type, group_size, model_name, quantized_model_name)
     if exists_message :
         return exists_message
+    if quantization_type == "int4_weight_only" and device == "cpu" :
+        return "int4_weight_only not supported on cpu"
+    quantized_model = quantize_model(model_name, quantization_type, group_size, oauth_token, profile.username, device)
     return save_model(quantized_model, model_name, quantization_type, group_size, profile.username, oauth_token, quantized_model_name)
 with gr.Blocks(theme=gr.themes.Soft()) as app:
     gr.Markdown(
         """
+        # 🚀 LLM Model Quantization App
         Quantize your favorite Hugging Face models and save them to your profile!
         """
                 value=128,
                 interactive=True
             )
+            device = gr.Dropdown(
+                label="Device (int4 only works with cuda)",
+                choices=["cuda", "cpu"],
+                value="cuda"
+            )
             quantized_model_name = gr.Textbox(
                 label="Model Name (optional : to override default)",
                 value="",
         """
         ## Instructions
         1. Login to your HuggingFace account
+        2. Enter the name of the Hugging Face LLM model you want to quantize (Make sure you have access to it)
         3. Choose the quantization type.
         4. Optionally, specify the group size.
         5. Optionally, choose a custom name for the quantized model
     quantize_button.click(
         fn=quantize_and_save,
+        inputs=[model_name, quantization_type, group_size, quantized_model_name, device],
         outputs=[output_link]
     )