MekkCyber commited on
Commit
d66dbed
·
1 Parent(s): 7c6abe1

first push

Browse files
Files changed (3) hide show
  1. README.md +15 -5
  2. app.py +259 -0
  3. requirements.txt +5 -0
README.md CHANGED
@@ -1,12 +1,22 @@
1
  ---
2
- title: Quanto
3
- emoji:
4
- colorFrom: indigo
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 5.11.0
8
  app_file: app.py
9
  pinned: false
 
 
 
 
 
 
 
 
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: TorchAO
3
+ emoji: 💻
4
+ colorFrom: blue
5
+ colorTo: red
6
  sdk: gradio
7
+ sdk_version: 5.10.0
8
  app_file: app.py
9
  pinned: false
10
+
11
+ hf_oauth: true
12
+ # optional, default duration is 8 hours/480 minutes. Max duration is 30 days/43200 minutes.
13
+ hf_oauth_expiration_minutes: 480
14
+ # optional, see "Scopes" below. "openid profile" is always included.
15
+ hf_oauth_scopes:
16
+ - read-repos
17
+ - write-repos
18
+ - manage-repos
19
+ - inference-api
20
  ---
21
 
22
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel, BitsAndBytesConfig
4
+ import tempfile
5
+ from huggingface_hub import HfApi
6
+ from huggingface_hub import list_models
7
+ from gradio_huggingfacehub_search import HuggingfaceHubSearch
8
+ from packaging import version
9
+ import os
10
+ import spaces
11
+
12
+
13
+ def hello(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) -> str:
14
+ # ^ expect a gr.OAuthProfile object as input to get the user's profile
15
+ # if the user is not logged in, profile will be None
16
+ if profile is None:
17
+ return "Hello !"
18
+ return f"Hello {profile.name} !"
19
+
20
+ def check_model_exists(oauth_token: gr.OAuthToken | None, username, quantization_type, model_name, quantized_model_name):
21
+ """Check if a model exists in the user's Hugging Face repository."""
22
+ try:
23
+ models = list_models(author=username, token=oauth_token.token)
24
+ model_names = [model.id for model in models]
25
+ if quantized_model_name :
26
+ repo_name = f"{username}/{quantized_model_name}"
27
+ else :
28
+ repo_name = f"{username}/{model_name.split('/')[-1]}-BNB-{quantization_type}"
29
+
30
+ if repo_name in model_names:
31
+ return f"Model '{repo_name}' already exists in your repository."
32
+ else:
33
+ return None # Model does not exist
34
+ except Exception as e:
35
+ return f"Error checking model existence: {str(e)}"
36
+
37
+ def create_model_card(model_name, quantization_type, threshold, quant_type_4, double_quant_4,):
38
+ model_card = f"""---
39
+ base_model:
40
+ - {model_name}
41
+ ---
42
+
43
+ # {model_name} (Quantized)
44
+
45
+ ## Description
46
+ This model is a quantized version of the original model `{model_name}`. It has been quantized using {quantization_type} quantization with bitsandbytes.
47
+
48
+ ## Quantization Details
49
+ - **Quantization Type**: {quantization_type}
50
+ - **Threshold**: {threshold if quantization_type == "int8" else None}
51
+ - **bnb_4bit_quant_type**: {quant_type_4 if quantization_type == "int4" else None}
52
+ - **bnb_4bit_use_double_quant**: {double_quant_4 if quantization_type=="int4" else None}
53
+
54
+ ## Usage
55
+ You can use this model in your applications by loading it directly from the Hugging Face Hub:
56
+
57
+ ```python
58
+ from transformers import AutoModel
59
+
60
+ model = AutoModel.from_pretrained("{model_name}")"""
61
+
62
+ return model_card
63
+
64
+ def load_model(model_name, quantization_config, auth_token) :
65
+ return AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cuda", use_auth_token=auth_token.token)
66
+
67
+ def load_model_cpu(model_name, quantization_config, auth_token) :
68
+ return AutoModel.from_pretrained(model_name, quantization_config=quantization_config, use_auth_token=auth_token.token)
69
+
70
+ def quantize_model(model_name, quantization_type, threshold, quant_type_4, double_quant_4, auth_token=None, username=None):
71
+ print(f"Quantizing model: {quantization_type}")
72
+ if quantization_type=="int4":
73
+ quantization_config = BitsAndBytesConfig(
74
+ load_in_4bit=True,
75
+ bnb_4bit_quant_type=quant_type_4,
76
+ bnb_4bit_use_double_quant=True if double_quant_4 == "True" else False,
77
+ )
78
+ else :
79
+ quantization_config = BitsAndBytesConfig(
80
+ load_in_8bit=True,
81
+ llm_int8_threshold=threshold,
82
+ )
83
+ model = load_model(model_name, quantization_config=quantization_config, auth_token=auth_token)
84
+
85
+ return model
86
+
87
+ def save_model(model, model_name, quantization_type, threshold, quant_type_4, double_quant_4, username=None, auth_token=None, quantized_model_name=None):
88
+ print("Saving quantized model")
89
+ with tempfile.TemporaryDirectory() as tmpdirname:
90
+
91
+
92
+ model.save_pretrained(tmpdirname, safe_serialization=False, use_auth_token=auth_token.token)
93
+ if quantized_model_name :
94
+ repo_name = f"{username}/{quantized_model_name}"
95
+ else :
96
+ if quantization_type == "int4_weight_only" :
97
+ repo_name = f"{username}/{model_name.split('/')[-1]}-BNB-{quantization_type}"
98
+ else :
99
+ repo_name = f"{username}/{model_name.split('/')[-1]}-BNB-{quantization_type}"
100
+
101
+ model_card = create_model_card(repo_name, quantization_type, threshold, quant_type_4, double_quant_4)
102
+ with open(os.path.join(tmpdirname, "README.md"), "w") as f:
103
+ f.write(model_card)
104
+ # Push to Hub
105
+ api = HfApi(token=auth_token.token)
106
+ api.create_repo(repo_name, exist_ok=True)
107
+ api.upload_folder(
108
+ folder_path=tmpdirname,
109
+ repo_id=repo_name,
110
+ repo_type="model",
111
+ )
112
+ return f'<h1> 🤗 DONE</h1><br/>Find your repo here: <a href="https://huggingface.co/{repo_name}" target="_blank" style="text-decoration:underline">{repo_name}</a>'
113
+
114
+ def is_float(value):
115
+ try:
116
+ float(value)
117
+ return True
118
+ except ValueError:
119
+ return False
120
+
121
+ def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, model_name, quantization_type, threshold, quant_type_4, double_quant_4, quantized_model_name):
122
+ if oauth_token is None :
123
+ return "Error : Please Sign In to your HuggingFace account to use the quantizer"
124
+ if not profile:
125
+ return "Error: Please Sign In to your HuggingFace account to use the quantizer"
126
+ exists_message = check_model_exists(oauth_token, profile.username, quantization_type, model_name, quantized_model_name)
127
+ if exists_message :
128
+ return exists_message
129
+
130
+ if not is_float(threshold) :
131
+ return "Threshold must be a float"
132
+
133
+ threshold = float(threshold)
134
+
135
+ try:
136
+ quantized_model = quantize_model(model_name, quantization_type, threshold, quant_type_4, double_quant_4, oauth_token, profile.username)
137
+ return save_model(quantized_model, model_name, quantization_type, threshold, quant_type_4, double_quant_4, profile.username, oauth_token, quantized_model_name)
138
+ except Exception as e :
139
+ return f"An error occurred: {str(e)}"
140
+
141
+
142
+ css="""/* Custom CSS to allow scrolling */
143
+ .gradio-container {overflow-y: auto;}
144
+ """
145
+ with gr.Blocks(theme=gr.themes.Ocean(), css=css) as app:
146
+ gr.Markdown(
147
+ """
148
+ # 🤗 LLM Model BitsAndBytes Quantization App
149
+
150
+ Quantize your favorite Hugging Face models using BitsAndBytes and save them to your profile!
151
+ """
152
+ )
153
+
154
+ gr.LoginButton(elem_id="login-button", elem_classes="center-button", min_width=250)
155
+
156
+ m1 = gr.Markdown()
157
+ app.load(hello, inputs=None, outputs=m1)
158
+
159
+
160
+ radio = gr.Radio(["show", "hide"], label="Show Instructions")
161
+ instructions = gr.Markdown(
162
+ """
163
+ ## Instructions
164
+
165
+ 1. Login to your HuggingFace account
166
+ 2. Enter the name of the Hugging Face LLM model you want to quantize (Make sure you have access to it)
167
+ 3. Choose the quantization type.
168
+ 4. Optionally, specify the group size.
169
+ 5. Optionally, choose a custom name for the quantized model
170
+ 6. Click "Quantize and Save Model" to start the process.
171
+ 7. Once complete, you'll receive a link to the quantized model on Hugging Face.
172
+
173
+ Note: This process may take some time depending on the model size and your hardware you can check the container logs to see where are you at in the process!
174
+ """,
175
+ visible=False
176
+ )
177
+ def update_visibility(radio): # Accept the event argument, even if not used
178
+ value = radio # Get the selected value from the radio button
179
+ if value == "show":
180
+ return gr.Textbox(visible=True) #make it visible
181
+ else:
182
+ return gr.Textbox(visible=False)
183
+ radio.change(update_visibility, radio, instructions)
184
+
185
+ with gr.Row():
186
+ with gr.Column():
187
+ with gr.Row():
188
+ model_name = HuggingfaceHubSearch(
189
+ label="Hub Model ID",
190
+ placeholder="Search for model id on Huggingface",
191
+ search_type="model",
192
+ )
193
+ with gr.Row():
194
+ with gr.Column():
195
+ quantization_type = gr.Dropdown(
196
+ info="Quantization Type",
197
+ choices=["int4", "int8"],
198
+ value="int8",
199
+ filterable=False,
200
+ show_label=False,
201
+ )
202
+ threshold_8 = gr.Textbox(
203
+ info="Outlier threshold",
204
+ value=6,
205
+ interactive=True,
206
+ show_label=False,
207
+ visible=False
208
+ )
209
+ quant_type_4 = gr.Dropdown(
210
+ info="The quantization data type in the bnb.nn.Linear4Bit layers",
211
+ choices=["fp4", "nf4"],
212
+ value="fp4",
213
+ visible=False,
214
+ show_label=False
215
+ )
216
+ radio_4 = gr.Radio(["False", "True"], label="Use Double Quant", visible=False, value="False")
217
+
218
+ def update_visibility(quantization_type):
219
+ return gr.update(visible=(quantization_type=="int8")), gr.update(visible=(quantization_type=="int4")), gr.update(visible=(quantization_type=="int4"))
220
+
221
+ quantization_type.change(fn=update_visibility, inputs=quantization_type, outputs=[threshold_8, quant_type_4, radio_4])
222
+
223
+ quantized_model_name = gr.Textbox(
224
+ info="Model Name (optional : to override default)",
225
+ value="",
226
+ interactive=True,
227
+ show_label=False
228
+ )
229
+ with gr.Column():
230
+ quantize_button = gr.Button("Quantize and Save Model", variant="primary")
231
+ output_link = gr.Markdown(label="Quantized Model Link", container=True, min_height=40)
232
+
233
+
234
+ # Adding CSS styles for the username box
235
+ app.css = """
236
+ #username-box {
237
+ background-color: #f0f8ff; /* Light color */
238
+ border-radius: 8px;
239
+ padding: 10px;
240
+ }
241
+ """
242
+ app.css = """
243
+ .center-button {
244
+ display: flex;
245
+ justify-content: center;
246
+ align-items: center;
247
+ margin: 0 auto; /* Center horizontally */
248
+ }
249
+ """
250
+
251
+ quantize_button.click(
252
+ fn=quantize_and_save,
253
+ inputs=[model_name, quantization_type, threshold_8, quant_type_4, radio_4, quantized_model_name],
254
+ outputs=[output_link]
255
+ )
256
+
257
+
258
+ # Launch the app
259
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ git+https://github.com/huggingface/transformers.git@main#egg=transformers
2
+ accelerate
3
+ huggingface-hub
4
+ gradio-huggingfacehub-search
5
+ optimum-quanto