Just823 commited on
Commit
0e5224f
·
verified ·
1 Parent(s): c5d7263

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +289 -0
app.py ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import gradio as gr
4
+ from huggingface_hub import HfApi, whoami, ModelCard, model_info
5
+ from gradio_huggingfacehub_search import HuggingfaceHubSearch
6
+ from textwrap import dedent
7
+ from pathlib import Path
8
+
9
+ from tempfile import TemporaryDirectory
10
+
11
+ from huggingface_hub.file_download import repo_folder_name
12
+ from optimum.exporters import TasksManager
13
+
14
+ from optimum.intel.utils.modeling_utils import _find_files_matching_pattern
15
+ from optimum.intel import (
16
+ OVModelForAudioClassification,
17
+ OVModelForCausalLM,
18
+ OVModelForFeatureExtraction,
19
+ OVModelForImageClassification,
20
+ OVModelForMaskedLM,
21
+ OVModelForQuestionAnswering,
22
+ OVModelForSeq2SeqLM,
23
+ OVModelForSequenceClassification,
24
+ OVModelForTokenClassification,
25
+ OVStableDiffusionPipeline,
26
+ OVStableDiffusionXLPipeline,
27
+ OVLatentConsistencyModelPipeline,
28
+ OVWeightQuantizationConfig,
29
+ )
30
+ from diffusers import ConfigMixin
31
+
32
+ _HEAD_TO_AUTOMODELS = {
33
+ "feature-extraction": "OVModelForFeatureExtraction",
34
+ "fill-mask": "OVModelForMaskedLM",
35
+ "text-generation": "OVModelForCausalLM",
36
+ "text-classification": "OVModelForSequenceClassification",
37
+ "token-classification": "OVModelForTokenClassification",
38
+ "question-answering": "OVModelForQuestionAnswering",
39
+ "image-classification": "OVModelForImageClassification",
40
+ "audio-classification": "OVModelForAudioClassification",
41
+ "stable-diffusion": "OVStableDiffusionPipeline",
42
+ "stable-diffusion-xl": "OVStableDiffusionXLPipeline",
43
+ "latent-consistency": "OVLatentConsistencyModelPipeline",
44
+ }
45
+
46
+ def quantize_model(
47
+ model_id: str,
48
+ dtype: str,
49
+ calibration_dataset: str,
50
+ ratio: str,
51
+ private_repo: bool,
52
+ overwritte: bool,
53
+ oauth_token: gr.OAuthToken,
54
+ ):
55
+ if oauth_token.token is None:
56
+ return "You must be logged in to use this space"
57
+
58
+ if not model_id:
59
+ return f"### Invalid input 🐞 Please specify a model name, got {model_id}"
60
+
61
+ try:
62
+ model_name = model_id.split("/")[-1]
63
+ username = whoami(oauth_token.token)["name"]
64
+ w_t = dtype.replace("-", "")
65
+ suffix = f"{w_t}" if model_name.endswith("openvino") else f"openvino-{w_t}"
66
+ new_repo_id = f"{username}/{model_name}-{suffix}"
67
+ library_name = TasksManager.infer_library_from_model(model_id, token=oauth_token.token)
68
+
69
+ if library_name == "diffusers":
70
+ ConfigMixin.config_name = "model_index.json"
71
+ class_name = ConfigMixin.load_config(model_id, token=oauth_token.token)["_class_name"].lower()
72
+ if "xl" in class_name:
73
+ task = "stable-diffusion-xl"
74
+ elif "consistency" in class_name:
75
+ task = "latent-consistency"
76
+ else:
77
+ task = "stable-diffusion"
78
+ else:
79
+ task = TasksManager.infer_task_from_model(model_id, token=oauth_token.token)
80
+
81
+ if task == "text2text-generation":
82
+ return "Export of Seq2Seq models is currently disabled."
83
+
84
+ if task not in _HEAD_TO_AUTOMODELS:
85
+ return f"The task '{task}' is not supported, only {_HEAD_TO_AUTOMODELS.keys()} tasks are supported"
86
+
87
+ auto_model_class = _HEAD_TO_AUTOMODELS[task]
88
+ ov_files = _find_files_matching_pattern(
89
+ model_id,
90
+ pattern=r"(.*)?openvino(.*)?\_model.xml",
91
+ use_auth_token=oauth_token.token,
92
+ )
93
+ export = len(ov_files) == 0
94
+
95
+ if calibration_dataset == "None":
96
+ calibration_dataset = None
97
+
98
+ is_int8 = dtype == "8-bit"
99
+ # if library_name == "diffusers":
100
+ # quant_method = "hybrid"
101
+ if not is_int8 and calibration_dataset is not None:
102
+ quant_method = "awq"
103
+ else:
104
+ if calibration_dataset is not None:
105
+ print("Default quantization was selected, calibration dataset won't be used")
106
+ quant_method = "default"
107
+
108
+ quantization_config = OVWeightQuantizationConfig(
109
+ bits=8 if is_int8 else 4,
110
+ quant_method=quant_method,
111
+ dataset=None if quant_method=="default" else calibration_dataset,
112
+ ratio=1.0 if is_int8 else ratio,
113
+ num_samples=None if quant_method=="default" else 20,
114
+ )
115
+
116
+ api = HfApi(token=oauth_token.token)
117
+ if api.repo_exists(new_repo_id) and not overwritte:
118
+ return f"Model {new_repo_id} already exist, please tick the overwritte box to push on an existing repository"
119
+
120
+ with TemporaryDirectory() as d:
121
+ folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
122
+ os.makedirs(folder)
123
+
124
+ try:
125
+ api.snapshot_download(repo_id=model_id, local_dir=folder, allow_patterns=["*.json"])
126
+ ov_model = eval(auto_model_class).from_pretrained(
127
+ model_id,
128
+ export=export,
129
+ cache_dir=folder,
130
+ token=oauth_token.token,
131
+ quantization_config=quantization_config
132
+ )
133
+ ov_model.save_pretrained(folder)
134
+ new_repo_url = api.create_repo(repo_id=new_repo_id, exist_ok=True, private=private_repo)
135
+ new_repo_id = new_repo_url.repo_id
136
+ print("Repository created successfully!", new_repo_url)
137
+
138
+ folder = Path(folder)
139
+ for dir_name in (
140
+ "",
141
+ "vae_encoder",
142
+ "vae_decoder",
143
+ "text_encoder",
144
+ "text_encoder_2",
145
+ "unet",
146
+ "tokenizer",
147
+ "tokenizer_2",
148
+ "scheduler",
149
+ "feature_extractor",
150
+ ):
151
+ if not (folder / dir_name).is_dir():
152
+ continue
153
+ for file_path in (folder / dir_name).iterdir():
154
+ if file_path.is_file():
155
+ try:
156
+ api.upload_file(
157
+ path_or_fileobj=file_path,
158
+ path_in_repo=os.path.join(dir_name, file_path.name),
159
+ repo_id=new_repo_id,
160
+ )
161
+ except Exception as e:
162
+ return f"Error uploading file {file_path}: {e}"
163
+
164
+ try:
165
+ card = ModelCard.load(model_id, token=oauth_token.token)
166
+ except:
167
+ card = ModelCard("")
168
+
169
+ if card.data.tags is None:
170
+ card.data.tags = []
171
+ if "openvino" not in card.data.tags:
172
+ card.data.tags.append("openvino")
173
+ card.data.tags.append("nncf")
174
+ card.data.tags.append(dtype)
175
+ card.data.base_model = model_id
176
+
177
+ card.text = dedent(
178
+ f"""
179
+ This model is a quantized version of [`{model_id}`](https://huggingface.co/{model_id}) and is converted to the OpenVINO format. This model was obtained via the [nncf-quantization](https://huggingface.co/spaces/echarlaix/nncf-quantization) space with [optimum-intel](https://github.com/huggingface/optimum-intel).
180
+ First make sure you have `optimum-intel` installed:
181
+ ```bash
182
+ pip install optimum[openvino]
183
+ ```
184
+ To load your model you can do as follows:
185
+ ```python
186
+ from optimum.intel import {auto_model_class}
187
+ model_id = "{new_repo_id}"
188
+ model = {auto_model_class}.from_pretrained(model_id)
189
+ ```
190
+ """
191
+ )
192
+ card_path = os.path.join(folder, "README.md")
193
+ card.save(card_path)
194
+
195
+ api.upload_file(
196
+ path_or_fileobj=card_path,
197
+ path_in_repo="README.md",
198
+ repo_id=new_repo_id,
199
+ )
200
+ return f"This model was successfully quantized, find it under your repository {new_repo_url}"
201
+ finally:
202
+ shutil.rmtree(folder, ignore_errors=True)
203
+ except Exception as e:
204
+ return f"### Error: {e}"
205
+
206
+ DESCRIPTION = """
207
+ This Space uses [Optimum Intel](https://github.com/huggingface/optimum-intel) to automatically apply NNCF [Weight Only Quantization](https://huggingface.co/docs/optimum/main/en/intel/openvino/optimization) (WOQ) on your model and convert it to the [OpenVINO format](https://docs.openvino.ai/2024/documentation/openvino-ir-format.html) if not already.
208
+ After conversion, a repository will be pushed under your namespace with the resulting model.
209
+ The list of the supported architectures can be found in the [documentation](https://huggingface.co/docs/optimum/main/en/intel/openvino/models)
210
+ """
211
+
212
+ model_id = HuggingfaceHubSearch(
213
+ label="Hub Model ID",
214
+ placeholder="Search for model id on the hub",
215
+ search_type="model",
216
+ )
217
+ dtype = gr.Dropdown(
218
+ ["8-bit", "4-bit"],
219
+ value="8-bit",
220
+ label="Weights precision",
221
+ filterable=False,
222
+ visible=True,
223
+ )
224
+ """
225
+ quant_method = gr.Dropdown(
226
+ ["default", "awq", "hybrid"],
227
+ value="default",
228
+ label="Quantization method",
229
+ filterable=False,
230
+ visible=True,
231
+ )
232
+ """
233
+ calibration_dataset = gr.Dropdown(
234
+ [
235
+ "None",
236
+ "wikitext2",
237
+ "c4",
238
+ "c4-new",
239
+ "conceptual_captions",
240
+ "laion/220k-GPT4Vision-captions-from-LIVIS",
241
+ "laion/filtered-wit",
242
+ ],
243
+ value="None",
244
+ label="Calibration dataset",
245
+ filterable=False,
246
+ visible=True,
247
+ )
248
+ ratio = gr.Slider(
249
+ label="Ratio",
250
+ info="Parameter used when applying 4-bit quantization to control the ratio between 4-bit and 8-bit quantization",
251
+ minimum=0.0,
252
+ maximum=1.0,
253
+ step=0.1,
254
+ value=1.0,
255
+ )
256
+ private_repo = gr.Checkbox(
257
+ value=False,
258
+ label="Private repository",
259
+ info="Create a private repository instead of a public one",
260
+ )
261
+ overwritte = gr.Checkbox(
262
+ value=False,
263
+ label="Overwrite repository content",
264
+ info="Enable pushing files on existing repositories, potentially overwriting existing files",
265
+ )
266
+ interface = gr.Interface(
267
+ fn=quantize_model,
268
+ inputs=[
269
+ model_id,
270
+ dtype,
271
+ calibration_dataset,
272
+ ratio,
273
+ private_repo,
274
+ overwritte,
275
+ ],
276
+ outputs=[
277
+ gr.Markdown(label="output"),
278
+ ],
279
+ title="Quantize your model with NNCF",
280
+ description=DESCRIPTION,
281
+ api_name=False,
282
+ )
283
+
284
+ with gr.Blocks() as demo:
285
+ gr.Markdown("You must be logged in to use this space")
286
+ gr.LoginButton(min_width=250)
287
+ interface.render()
288
+
289
+ demo.launch()