Tom Aarsen commited on
Commit
6051ae2
·
1 Parent(s): ed9320d

Add initial Space

Browse files
Files changed (3) hide show
  1. .gitignore +3 -0
  2. app.py +889 -0
  3. requirements.txt +5 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+
2
+ __pycache__
3
+ .vscode
app.py ADDED
@@ -0,0 +1,889 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from enum import Enum
2
+ from pathlib import Path
3
+ from typing import Tuple
4
+ import gradio as gr
5
+ from gradio_huggingfacehub_search import HuggingfaceHubSearch
6
+ from sentence_transformers import SentenceTransformer
7
+ from sentence_transformers import (
8
+ export_dynamic_quantized_onnx_model as st_export_dynamic_quantized_onnx_model,
9
+ export_optimized_onnx_model as st_export_optimized_onnx_model,
10
+ export_static_quantized_openvino_model as st_export_static_quantized_openvino_model,
11
+ )
12
+ from huggingface_hub import model_info, upload_folder, whoami, get_repo_discussions, list_repo_commits, HfFileSystem
13
+ from huggingface_hub.errors import RepositoryNotFoundError
14
+ from optimum.intel import OVQuantizationConfig
15
+ from tempfile import TemporaryDirectory
16
+
17
+
18
+ class Backend(Enum):
19
+ # TORCH = "PyTorch"
20
+ ONNX = "ONNX"
21
+ ONNX_DYNAMIC_QUANTIZATION = "ONNX (Dynamic Quantization)"
22
+ ONNX_OPTIMIZATION = "ONNX (Optimization)"
23
+ OPENVINO = "OpenVINO"
24
+ OPENVINO_STATIC_QUANTIZATION = "OpenVINO (Static Quantization)"
25
+
26
+ def __str__(self):
27
+ return self.value
28
+
29
+
30
+ backends = [str(backend) for backend in Backend]
31
+ FILE_SYSTEM = HfFileSystem()
32
+
33
+ def is_new_model(model_id: str) -> bool:
34
+ """
35
+ Check if the model ID exists on the Hugging Face Hub. If we get a request error, then we
36
+ assume the model *does* exist.
37
+ """
38
+ try:
39
+ model_info(model_id)
40
+ except RepositoryNotFoundError:
41
+ return True
42
+ except Exception:
43
+ pass
44
+ return False
45
+
46
+
47
+ def is_sentence_transformer_model(model_id: str) -> bool:
48
+ return "sentence-transformers" in model_info(model_id).tags
49
+
50
+
51
+ def get_last_commit(model_id: str) -> str:
52
+ """
53
+ Get the last commit hash of the model ID.
54
+ """
55
+ return f"https://huggingface.co/{model_id}/commit/{list_repo_commits(model_id)[0].commit_id}"
56
+
57
+ def get_last_pr(model_id: str) -> Tuple[str, int]:
58
+ last_pr = next(get_repo_discussions(model_id))
59
+ return last_pr.url, last_pr.num
60
+
61
+
62
+ def does_file_glob_exist(repo_id: str, glob: str) -> bool:
63
+ """
64
+ Check if a file glob exists in the repository.
65
+ """
66
+ try:
67
+ return bool(FILE_SYSTEM.glob(f"{repo_id}/{glob}", detail=False))
68
+ except FileNotFoundError:
69
+ return False
70
+
71
+
72
+ def export_to_torch(model_id, create_pr, output_model_id):
73
+ model = SentenceTransformer(model_id, backend="torch")
74
+ model.push_to_hub(
75
+ repo_id=output_model_id,
76
+ create_pr=create_pr,
77
+ exist_ok=True,
78
+ )
79
+
80
+
81
+ def export_to_onnx(model_id: str, create_pr: bool, output_model_id: str):
82
+ if does_file_glob_exist(output_model_id, "**/model.onnx"):
83
+ raise FileExistsError("An ONNX model already exists in the repository")
84
+
85
+ model = SentenceTransformer(model_id, backend="onnx")
86
+
87
+ commit_message = "Add exported 'model.onnx' compatible with Sentence Transformers"
88
+
89
+ if is_new_model(output_model_id):
90
+ model.push_to_hub(
91
+ repo_id=output_model_id,
92
+ commit_message=commit_message,
93
+ create_pr=create_pr,
94
+ )
95
+ else:
96
+ with TemporaryDirectory() as tmp_dir:
97
+ model.save_pretrained(tmp_dir)
98
+
99
+ commit_description = f"""
100
+ Hello!
101
+
102
+ *This pull request has been automatically generated from the [Sentence Transformers backend-export](https://huggingface.co/spaces/sentence-transformers/backend-export) Space.*
103
+
104
+ ## Pull Request overview
105
+ * Add exported ONNX model `model.onnx`.
106
+
107
+ ## Tip:
108
+ Consider testing this pull request before merging by loading the model from this PR with the `revision` argument:
109
+ ```python
110
+ from sentence_transformers import SentenceTransformer
111
+
112
+ # TODO: Fill in the PR number
113
+ pr_number = 2
114
+ model = SentenceTransformer(
115
+ "{output_model_id}",
116
+ revision=f"refs/pr/{{pr_number}}",
117
+ backend="onnx",
118
+ )
119
+
120
+ # Verify that everything works as expected
121
+ embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
122
+ print(embeddings.shape)
123
+
124
+ similarities = model.similarity(embeddings, embeddings)
125
+ print(similarities)
126
+ ```
127
+ """
128
+
129
+ upload_folder(
130
+ repo_id=output_model_id,
131
+ folder_path=Path(tmp_dir) / "onnx",
132
+ path_in_repo="onnx",
133
+ commit_message=commit_message,
134
+ commit_description=commit_description if create_pr else None,
135
+ create_pr=create_pr,
136
+ )
137
+
138
+ def export_to_onnx_snippet(model_id: str, create_pr: bool, output_model_id: str) -> str:
139
+ return """\
140
+ pip install sentence_transformers[onnx-gpu]
141
+ # or
142
+ pip install sentence_transformers[onnx]
143
+ """, f"""\
144
+ from sentence_transformers import SentenceTransformer
145
+
146
+ # 1. Load the model to be exported with the ONNX backend
147
+ model = SentenceTransformer(
148
+ "{model_id}",
149
+ backend="onnx",
150
+ )
151
+
152
+ # 2. Push the model to the Hugging Face Hub
153
+ {f'model.push_to_hub("{output_model_id}")'
154
+ if not create_pr
155
+ else f'''model.push_to_hub(
156
+ "{output_model_id}",
157
+ create_pr=True,
158
+ )'''}
159
+ """, f"""\
160
+ from sentence_transformers import SentenceTransformer
161
+
162
+ # 1. Load the model from the Hugging Face Hub
163
+ # (until merged) Use the `revision` argument to load the model from the PR
164
+ pr_number = 2
165
+ model = SentenceTransformer(
166
+ "{output_model_id}",
167
+ revision=f"refs/pr/{{pr_number}}",
168
+ backend="onnx",
169
+ )
170
+
171
+ # 2. Inference works as normal
172
+ embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
173
+ similarities = model.similarity(embeddings, embeddings)
174
+ """
175
+
176
+
177
+ def export_to_onnx_dynamic_quantization(
178
+ model_id: str, create_pr: bool, output_model_id: str, onnx_quantization_config: str
179
+ ) -> None:
180
+ if does_file_glob_exist(output_model_id, f"onnx/model_qint8_{onnx_quantization_config}.onnx"):
181
+ raise FileExistsError("The quantized ONNX model already exists in the repository")
182
+
183
+ model = SentenceTransformer(model_id, backend="onnx")
184
+
185
+ if not create_pr and is_new_model(output_model_id):
186
+ model.push_to_hub(repo_id=output_model_id)
187
+
188
+ try:
189
+ st_export_dynamic_quantized_onnx_model(
190
+ model,
191
+ quantization_config=onnx_quantization_config,
192
+ model_name_or_path=output_model_id,
193
+ push_to_hub=True,
194
+ create_pr=create_pr,
195
+ )
196
+ except ValueError:
197
+ # Currently, quantization with optimum has some issues if there's already an ONNX model in a subfolder
198
+ model = SentenceTransformer(model_id, backend="onnx", model_kwargs={"export": True})
199
+ st_export_dynamic_quantized_onnx_model(
200
+ model,
201
+ quantization_config=onnx_quantization_config,
202
+ model_name_or_path=output_model_id,
203
+ push_to_hub=True,
204
+ create_pr=create_pr,
205
+ )
206
+
207
+ def export_to_onnx_dynamic_quantization_snippet(
208
+ model_id: str, create_pr: bool, output_model_id: str, onnx_quantization_config: str
209
+ ) -> str:
210
+ return """\
211
+ pip install sentence_transformers[onnx-gpu]
212
+ # or
213
+ pip install sentence_transformers[onnx]
214
+ """, f"""\
215
+ from sentence_transformers import (
216
+ SentenceTransformer,
217
+ export_dynamic_quantized_onnx_model,
218
+ )
219
+
220
+ # 1. Load the model to be quantized with the ONNX backend
221
+ model = SentenceTransformer(
222
+ "{model_id}",
223
+ backend="onnx",
224
+ )
225
+
226
+ # 2. Export the model with {onnx_quantization_config} dynamic quantization
227
+ export_dynamic_quantized_onnx_model(
228
+ model,
229
+ quantization_config="{onnx_quantization_config}",
230
+ model_name_or_path="{output_model_id}",
231
+ push_to_hub=True,
232
+ {''' create_pr=True,
233
+ ''' if create_pr else ''})
234
+ """, f"""\
235
+ from sentence_transformers import SentenceTransformer
236
+
237
+ # 1. Load the model from the Hugging Face Hub
238
+ # (until merged) Use the `revision` argument to load the model from the PR
239
+ pr_number = 2
240
+ model = SentenceTransformer(
241
+ "{output_model_id}",
242
+ revision=f"refs/pr/{{pr_number}}",
243
+ backend="onnx",
244
+ model_kwargs={{"file_name": "model_qint8_{onnx_quantization_config}.onnx"}},
245
+ )
246
+
247
+ # 2. Inference works as normal
248
+ embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
249
+ similarities = model.similarity(embeddings, embeddings)
250
+ """
251
+
252
+ def export_to_onnx_optimization(model_id: str, create_pr: bool, output_model_id: str, onnx_optimization_config: str) -> None:
253
+ if does_file_glob_exist(output_model_id, f"onnx/model_{onnx_optimization_config}.onnx"):
254
+ raise FileExistsError("The optimized ONNX model already exists in the repository")
255
+
256
+ model = SentenceTransformer(model_id, backend="onnx")
257
+
258
+ if not create_pr and is_new_model(output_model_id):
259
+ model.push_to_hub(repo_id=output_model_id)
260
+
261
+ st_export_optimized_onnx_model(
262
+ model,
263
+ optimization_config=onnx_optimization_config,
264
+ model_name_or_path=output_model_id,
265
+ push_to_hub=True,
266
+ create_pr=create_pr,
267
+ )
268
+
269
+ def export_to_onnx_optimization_snippet(model_id: str, create_pr: bool, output_model_id: str, onnx_optimization_config: str) -> str:
270
+ return """\
271
+ pip install sentence_transformers[onnx-gpu]
272
+ # or
273
+ pip install sentence_transformers[onnx]
274
+ """, f"""\
275
+ from sentence_transformers import (
276
+ SentenceTransformer,
277
+ export_optimized_onnx_model,
278
+ )
279
+
280
+ # 1. Load the model to be optimized with the ONNX backend
281
+ model = SentenceTransformer(
282
+ "{model_id}",
283
+ backend="onnx",
284
+ )
285
+
286
+ # 2. Export the model with {onnx_optimization_config} optimization level
287
+ export_optimized_onnx_model(
288
+ model,
289
+ optimization_config="{onnx_optimization_config}",
290
+ model_name_or_path="{output_model_id}",
291
+ push_to_hub=True,
292
+ {''' create_pr=True,
293
+ ''' if create_pr else ''})
294
+ """, f"""\
295
+ from sentence_transformers import SentenceTransformer
296
+
297
+ # 1. Load the model from the Hugging Face Hub
298
+ # (until merged) Use the `revision` argument to load the model from the PR
299
+ pr_number = 2
300
+ model = SentenceTransformer(
301
+ "{output_model_id}",
302
+ revision=f"refs/pr/{{pr_number}}",
303
+ backend="onnx",
304
+ model_kwargs={{"file_name": "model_{onnx_optimization_config}.onnx"}},
305
+ )
306
+
307
+ # 2. Inference works as normal
308
+ embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
309
+ similarities = model.similarity(embeddings, embeddings)
310
+ """
311
+
312
+
313
+ def export_to_openvino(model_id: str, create_pr: bool, output_model_id: str) -> None:
314
+ if does_file_glob_exist(output_model_id, "**/openvino_model.xml"):
315
+ raise FileExistsError("The OpenVINO model already exists in the repository")
316
+
317
+ model = SentenceTransformer(model_id, backend="openvino")
318
+
319
+ commit_message = "Add exported 'openvino_model.xml' compatible with Sentence Transformers"
320
+
321
+ if is_new_model(output_model_id):
322
+ model.push_to_hub(
323
+ repo_id=output_model_id,
324
+ commit_message=commit_message,
325
+ create_pr=create_pr,
326
+ )
327
+ else:
328
+ with TemporaryDirectory() as tmp_dir:
329
+ model.save_pretrained(tmp_dir)
330
+
331
+ commit_description = f"""
332
+ Hello!
333
+
334
+ *This pull request has been automatically generated from the [Sentence Transformers backend-export](https://huggingface.co/spaces/sentence-transformers/backend-export) Space.*
335
+
336
+ ## Pull Request overview
337
+ * Add exported OpenVINO model `openvino_model.xml`.
338
+
339
+ ## Tip:
340
+ Consider testing this pull request before merging by loading the model from this PR with the `revision` argument:
341
+ ```python
342
+ from sentence_transformers import SentenceTransformer
343
+
344
+ # TODO: Fill in the PR number
345
+ pr_number = 2
346
+ model = SentenceTransformer(
347
+ "{output_model_id}",
348
+ revision=f"refs/pr/{{pr_number}}",
349
+ backend="openvino",
350
+ )
351
+
352
+ # Verify that everything works as expected
353
+ embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
354
+ print(embeddings.shape)
355
+
356
+ similarities = model.similarity(embeddings, embeddings)
357
+ print(similarities)
358
+ ```
359
+ """
360
+
361
+ upload_folder(
362
+ repo_id=output_model_id,
363
+ folder_path=Path(tmp_dir) / "openvino",
364
+ path_in_repo="openvino",
365
+ commit_message=commit_message,
366
+ commit_description=commit_description if create_pr else None,
367
+ create_pr=create_pr,
368
+ )
369
+
370
+ def export_to_openvino_snippet(model_id: str, create_pr: bool, output_model_id: str) -> str:
371
+ return """\
372
+ pip install sentence_transformers[openvino]
373
+ """, f"""\
374
+ from sentence_transformers import SentenceTransformer
375
+
376
+ # 1. Load the model to be exported with the OpenVINO backend
377
+ model = SentenceTransformer(
378
+ "{model_id}",
379
+ backend="openvino",
380
+ )
381
+
382
+ # 2. Push the model to the Hugging Face Hub
383
+ {f'model.push_to_hub("{output_model_id}")'
384
+ if not create_pr
385
+ else f'''model.push_to_hub(
386
+ "{output_model_id}",
387
+ create_pr=True,
388
+ )'''}
389
+ """, f"""\
390
+ from sentence_transformers import SentenceTransformer
391
+
392
+ # 1. Load the model from the Hugging Face Hub
393
+ # (until merged) Use the `revision` argument to load the model from the PR
394
+ pr_number = 2
395
+ model = SentenceTransformer(
396
+ "{output_model_id}",
397
+ revision=f"refs/pr/{{pr_number}}",
398
+ backend="openvino",
399
+ )
400
+
401
+ # 2. Inference works as normal
402
+ embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
403
+ similarities = model.similarity(embeddings, embeddings)
404
+ """
405
+
406
+ def export_to_openvino_static_quantization(
407
+ model_id: str,
408
+ create_pr: bool,
409
+ output_model_id: str,
410
+ ov_quant_dataset_name: str,
411
+ ov_quant_dataset_subset: str,
412
+ ov_quant_dataset_split: str,
413
+ ov_quant_dataset_column_name: str,
414
+ ov_quant_dataset_num_samples: int,
415
+ ) -> None:
416
+ if does_file_glob_exist(output_model_id, "openvino/openvino_model_qint8_quantized.xml"):
417
+ raise FileExistsError("The quantized OpenVINO model already exists in the repository")
418
+
419
+ model = SentenceTransformer(model_id, backend="openvino")
420
+
421
+ if not create_pr and is_new_model(output_model_id):
422
+ model.push_to_hub(repo_id=output_model_id)
423
+
424
+ st_export_static_quantized_openvino_model(
425
+ model,
426
+ quantization_config=OVQuantizationConfig(
427
+ num_samples=ov_quant_dataset_num_samples,
428
+ ),
429
+ model_name_or_path=output_model_id,
430
+ dataset_name=ov_quant_dataset_name,
431
+ dataset_config_name=ov_quant_dataset_subset,
432
+ dataset_split=ov_quant_dataset_split,
433
+ column_name=ov_quant_dataset_column_name,
434
+ push_to_hub=True,
435
+ create_pr=create_pr,
436
+ )
437
+
438
+ def export_to_openvino_static_quantization_snippet(
439
+ model_id: str,
440
+ create_pr: bool,
441
+ output_model_id: str,
442
+ ov_quant_dataset_name: str,
443
+ ov_quant_dataset_subset: str,
444
+ ov_quant_dataset_split: str,
445
+ ov_quant_dataset_column_name: str,
446
+ ov_quant_dataset_num_samples: int,
447
+ ) -> str:
448
+ return """\
449
+ pip install sentence_transformers[openvino]
450
+ """, f"""\
451
+ from sentence_transformers import (
452
+ SentenceTransformer,
453
+ export_static_quantized_openvino_model,
454
+ )
455
+ from optimum.intel import OVQuantizationConfig
456
+
457
+ # 1. Load the model to be quantized with the OpenVINO backend
458
+ model = SentenceTransformer(
459
+ "{model_id}",
460
+ backend="openvino",
461
+ )
462
+
463
+ # 2. Export the model with int8 static quantization
464
+ export_static_quantized_openvino_model(
465
+ model,
466
+ quantization_config=OVQuantizationConfig(
467
+ num_samples={ov_quant_dataset_num_samples},
468
+ ),
469
+ model_name_or_path="{output_model_id}",
470
+ dataset_name="{ov_quant_dataset_name}",
471
+ dataset_config_name="{ov_quant_dataset_subset}",
472
+ dataset_split="{ov_quant_dataset_split}",
473
+ column_name="{ov_quant_dataset_column_name}",
474
+ push_to_hub=True,
475
+ {''' create_pr=True,
476
+ ''' if create_pr else ''})
477
+ """, f"""\
478
+ from sentence_transformers import SentenceTransformer
479
+
480
+ # 1. Load the model from the Hugging Face Hub
481
+ # (until merged) Use the `revision` argument to load the model from the PR
482
+ pr_number = 2
483
+ model = SentenceTransformer(
484
+ "{output_model_id}",
485
+ revision=f"refs/pr/{{pr_number}}",
486
+ backend="openvino",
487
+ model_kwargs={{"file_name": "openvino_model_qint8_quantized.xml"}},
488
+ )
489
+
490
+ # 2. Inference works as normal
491
+ embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
492
+ similarities = model.similarity(embeddings, embeddings)
493
+ """
494
+
495
+ def on_submit(
496
+ model_id,
497
+ create_pr,
498
+ output_model_id,
499
+ backend,
500
+ onnx_quantization_config,
501
+ onnx_optimization_config,
502
+ ov_quant_dataset_name,
503
+ ov_quant_dataset_subset,
504
+ ov_quant_dataset_split,
505
+ ov_quant_dataset_column_name,
506
+ ov_quant_dataset_num_samples,
507
+ inference_snippet: str,
508
+ ):
509
+
510
+ if not model_id:
511
+ return "Commit or PR url:<br>...", inference_snippet, gr.Textbox("Please enter a model ID", visible=True)
512
+
513
+ if not is_sentence_transformer_model(model_id):
514
+ return "Commit or PR url:<br>...", inference_snippet, gr.Textbox("The source model must have a Sentence Transformers tag", visible=True)
515
+
516
+ if output_model_id and "/" not in output_model_id:
517
+ try:
518
+ output_model_id = f"{whoami()['name']}/{output_model_id}"
519
+ except Exception:
520
+ return "Commit or PR url:<br>...", inference_snippet, gr.Textbox("You might be signed in with Hugging Face to use this Space", visible=True)
521
+
522
+ output_model_id = output_model_id if not create_pr else model_id
523
+
524
+ try:
525
+ if backend == Backend.ONNX.value:
526
+ export_to_onnx(model_id, create_pr, output_model_id)
527
+ elif backend == Backend.ONNX_DYNAMIC_QUANTIZATION.value:
528
+ export_to_onnx_dynamic_quantization(
529
+ model_id, create_pr, output_model_id, onnx_quantization_config
530
+ )
531
+ elif backend == Backend.ONNX_OPTIMIZATION.value:
532
+ export_to_onnx_optimization(
533
+ model_id, create_pr, output_model_id, onnx_optimization_config
534
+ )
535
+ elif backend == Backend.OPENVINO.value:
536
+ export_to_openvino(model_id, create_pr, output_model_id)
537
+ elif backend == Backend.OPENVINO_STATIC_QUANTIZATION.value:
538
+ export_to_openvino_static_quantization(
539
+ model_id,
540
+ create_pr,
541
+ output_model_id,
542
+ ov_quant_dataset_name,
543
+ ov_quant_dataset_subset,
544
+ ov_quant_dataset_split,
545
+ ov_quant_dataset_column_name,
546
+ ov_quant_dataset_num_samples,
547
+ )
548
+ except FileExistsError as exc:
549
+ return "Commit or PR url:<br>...", gr.Textbox(str(exc), visible=True)
550
+
551
+
552
+ if create_pr:
553
+ url, num = get_last_pr(output_model_id)
554
+ return f"PR url:<br>{url}", inference_snippet.replace("pr_number = 2", f"pr_number = {num}"), gr.Textbox(visible=False)
555
+
556
+ # Remove the lines that refer to the revision argument
557
+ lines = inference_snippet.splitlines()
558
+ del lines[7]
559
+ del lines[4]
560
+ del lines[3]
561
+ inference_snippet = "\n".join(lines)
562
+ return f"Commit url:<br>{get_last_commit(output_model_id)}", inference_snippet, gr.Textbox(visible=False)
563
+
564
+ def on_change(
565
+ model_id,
566
+ create_pr,
567
+ output_model_id,
568
+ backend,
569
+ onnx_quantization_config,
570
+ onnx_optimization_config,
571
+ ov_quant_dataset_name,
572
+ ov_quant_dataset_subset,
573
+ ov_quant_dataset_split,
574
+ ov_quant_dataset_column_name,
575
+ ov_quant_dataset_num_samples,
576
+ ) -> str:
577
+ if not model_id:
578
+ return "", "", "", gr.Textbox("Please enter a model ID", visible=True)
579
+
580
+ if output_model_id and "/" not in output_model_id:
581
+ try:
582
+ output_model_id = f"{whoami()['name']}/{output_model_id}"
583
+ except Exception:
584
+ return "", "", "", gr.Textbox("You might be signed in with Hugging Face to use this Space", visible=True)
585
+
586
+ output_model_id = output_model_id if not create_pr else model_id
587
+
588
+ if backend == Backend.ONNX.value:
589
+ snippets = export_to_onnx_snippet(model_id, create_pr, output_model_id)
590
+ elif backend == Backend.ONNX_DYNAMIC_QUANTIZATION.value:
591
+ snippets = export_to_onnx_dynamic_quantization_snippet(
592
+ model_id, create_pr, output_model_id, onnx_quantization_config
593
+ )
594
+ elif backend == Backend.ONNX_OPTIMIZATION.value:
595
+ snippets = export_to_onnx_optimization_snippet(
596
+ model_id, create_pr, output_model_id, onnx_optimization_config
597
+ )
598
+ elif backend == Backend.OPENVINO.value:
599
+ snippets = export_to_openvino_snippet(model_id, create_pr, output_model_id)
600
+ elif backend == Backend.OPENVINO_STATIC_QUANTIZATION.value:
601
+ snippets = export_to_openvino_static_quantization_snippet(
602
+ model_id,
603
+ create_pr,
604
+ output_model_id,
605
+ ov_quant_dataset_name,
606
+ ov_quant_dataset_subset,
607
+ ov_quant_dataset_split,
608
+ ov_quant_dataset_column_name,
609
+ ov_quant_dataset_num_samples,
610
+ )
611
+ else:
612
+ return "", "", "", gr.Textbox("Unexpected backend!", visible=True)
613
+
614
+ return *snippets, gr.Textbox(visible=False)
615
+
616
+
617
+ css = """
618
+ .container {
619
+ padding-left: 0;
620
+ }
621
+
622
+ .text-error {
623
+ background-color: #85282D;
624
+ /* background-color: #732E33; */
625
+ }
626
+
627
+ .small-text * {
628
+ font-size: var(--block-info-text-size);
629
+ }
630
+ """
631
+
632
+ with gr.Blocks(
633
+ css=css,
634
+ theme=gr.themes.Base(),
635
+ ) as demo:
636
+ gr.LoginButton(min_width=250)
637
+
638
+ with gr.Row():
639
+ # Left Input Column
640
+ with gr.Column(scale=2):
641
+
642
+ gr.Markdown(
643
+ value="""\
644
+ ### Export a Sentence Transformer model to accelerated backends
645
+
646
+ Sentence Transformers embedding models can be optimized for **faster inference** on CPU and GPU devices by exporting, quantizing, and optimizing them in ONNX and OpenVINO formats.
647
+ Observe the [Speeding up Inference](https://sbert.net/docs/sentence_transformer/usage/efficiency.html) documentation for more information.
648
+
649
+ <details><summary>Click to see performance benchmarks</summary>
650
+
651
+ | GPU | CPU |
652
+ | --- | --- |
653
+ | ![](https://sbert.net/_images/backends_benchmark_gpu.png) | ![](https://sbert.net/_images/backends_benchmark_cpu.png) |
654
+
655
+ * `onnx` refers to the ONNX backend
656
+ * `onnx-qint8` refers to ONNX (Dynamic Quantization)
657
+ * `onnx-O1` to `onnx-O4` refers to ONNX (Optimization)
658
+ * `openvino` refers to the OpenVINO backend
659
+ * `openvino-qint8` refers to OpenVINO (Static Quantization)
660
+
661
+ </details>
662
+
663
+ """,
664
+ label="",
665
+ container=True,
666
+ )
667
+
668
+ model_id = HuggingfaceHubSearch(
669
+ label="Hub Model ID",
670
+ placeholder="Search for Sentence Transformer models on Hugging Face",
671
+ search_type="model",
672
+ )
673
+ create_pr = gr.Checkbox(
674
+ value=True,
675
+ label="Create PR",
676
+ info="Create a pull request instead of pushing directly to the repository",
677
+ )
678
+ output_model_id = gr.Textbox(
679
+ value="",
680
+ label="Output Model ID",
681
+ placeholder="Output Model ID",
682
+ type="text",
683
+ visible=False,
684
+ )
685
+ create_pr.change(
686
+ lambda create_pr: gr.Textbox(visible=not create_pr),
687
+ inputs=[create_pr],
688
+ outputs=[output_model_id],
689
+ )
690
+
691
+ backend = gr.Radio(
692
+ choices=backends,
693
+ value=Backend.ONNX,
694
+ label="Backend",
695
+ )
696
+
697
+ with gr.Group(visible=True) as onnx_group:
698
+ gr.Markdown(
699
+ value="[ONNX Documentation](https://sbert.net/docs/sentence_transformer/usage/efficiency.html#onnx)",
700
+ container=True,
701
+ elem_classes=["small-text"]
702
+ )
703
+ with gr.Group(visible=False) as onnx_dynamic_quantization_group:
704
+ onnx_quantization_config = gr.Radio(
705
+ choices=["arm64", "avx2", "avx512", "avx512_vnni"],
706
+ value="avx512_vnni",
707
+ label="Quantization config",
708
+ info="[ONNX Quantization Documentation](https://sbert.net/docs/sentence_transformer/usage/efficiency.html#quantizing-onnx-models)"
709
+ )
710
+ with gr.Group(visible=False) as onnx_optimization_group:
711
+ onnx_optimization_config = gr.Radio(
712
+ choices=["O1", "O2", "O3", "O4"],
713
+ value="O4",
714
+ label="Optimization config",
715
+ info="[ONNX Optimization Documentation](https://sbert.net/docs/sentence_transformer/usage/efficiency.html#optimizing-onnx-models)"
716
+ )
717
+ with gr.Group(visible=False) as openvino_group:
718
+ gr.Markdown(
719
+ value="[OpenVINO Documentation](https://sbert.net/docs/sentence_transformer/usage/efficiency.html#openvino)",
720
+ container=True,
721
+ elem_classes=["small-text"]
722
+ )
723
+ with gr.Group(visible=False) as openvino_static_quantization_group:
724
+ gr.Markdown(
725
+ value="[OpenVINO Quantization Documentation](https://sbert.net/docs/sentence_transformer/usage/efficiency.html#quantizing-openvino-models)",
726
+ container=True,
727
+ elem_classes=["small-text"]
728
+ )
729
+ ov_quant_dataset_name = HuggingfaceHubSearch(
730
+ value="nyu-mll/glue",
731
+ label="Calibration Dataset Name",
732
+ placeholder="Search for Sentence Transformer datasets on Hugging Face",
733
+ search_type="dataset",
734
+ )
735
+ ov_quant_dataset_subset = gr.Textbox(
736
+ value="sst2",
737
+ label="Calibration Dataset Subset",
738
+ placeholder="Calibration Dataset Subset",
739
+ type="text",
740
+ max_lines=1,
741
+ )
742
+ ov_quant_dataset_split = gr.Textbox(
743
+ value="train",
744
+ label="Calibration Dataset Split",
745
+ placeholder="Calibration Dataset Split",
746
+ type="text",
747
+ max_lines=1,
748
+ )
749
+ ov_quant_dataset_column_name = gr.Textbox(
750
+ value="sentence",
751
+ label="Calibration Dataset Column Name",
752
+ placeholder="Calibration Dataset Column Name",
753
+ type="text",
754
+ max_lines=1,
755
+ )
756
+ ov_quant_dataset_num_samples = gr.Number(
757
+ value=300,
758
+ label="Calibration Dataset Num Samples",
759
+ )
760
+
761
+ backend.change(
762
+ lambda backend: (
763
+ (
764
+ gr.Group(visible=True)
765
+ if backend == Backend.ONNX.value
766
+ else gr.Group(visible=False)
767
+ ),
768
+ (
769
+ gr.Group(visible=True)
770
+ if backend == Backend.ONNX_DYNAMIC_QUANTIZATION.value
771
+ else gr.Group(visible=False)
772
+ ),
773
+ (
774
+ gr.Group(visible=True)
775
+ if backend == Backend.ONNX_OPTIMIZATION.value
776
+ else gr.Group(visible=False)
777
+ ),
778
+ (
779
+ gr.Group(visible=True)
780
+ if backend == Backend.OPENVINO.value
781
+ else gr.Group(visible=False)
782
+ ),
783
+ (
784
+ gr.Group(visible=True)
785
+ if backend == Backend.OPENVINO_STATIC_QUANTIZATION.value
786
+ else gr.Group(visible=False)
787
+ ),
788
+ ),
789
+ inputs=[backend],
790
+ outputs=[
791
+ onnx_group,
792
+ onnx_dynamic_quantization_group,
793
+ onnx_optimization_group,
794
+ openvino_group,
795
+ openvino_static_quantization_group,
796
+ ],
797
+ )
798
+
799
+ submit_button = gr.Button(
800
+ "Export Model",
801
+ variant="primary",
802
+ )
803
+
804
+ # Right Input Column
805
+ with gr.Column(scale=1):
806
+ error = gr.Textbox(
807
+ value="",
808
+ label="Error",
809
+ type="text",
810
+ visible=False,
811
+ max_lines=1,
812
+ interactive=False,
813
+ elem_classes=["text-error"],
814
+ )
815
+
816
+ requirements = gr.Code(
817
+ value="",
818
+ language="shell",
819
+ label="Requirements",
820
+ lines=1,
821
+ )
822
+ export_snippet = gr.Code(
823
+ value="",
824
+ language="python",
825
+ label="Export Snippet",
826
+ )
827
+ inference_snippet = gr.Code(
828
+ value="",
829
+ language="python",
830
+ label="Inference Snippet",
831
+ )
832
+ url = gr.Markdown(
833
+ value="Commit or PR url:<br>...",
834
+ label="",
835
+ container=True,
836
+ visible=True,
837
+ )
838
+
839
+ submit_button.click(
840
+ on_submit,
841
+ inputs=[
842
+ model_id,
843
+ create_pr,
844
+ output_model_id,
845
+ backend,
846
+ onnx_quantization_config,
847
+ onnx_optimization_config,
848
+ ov_quant_dataset_name,
849
+ ov_quant_dataset_subset,
850
+ ov_quant_dataset_split,
851
+ ov_quant_dataset_column_name,
852
+ ov_quant_dataset_num_samples,
853
+ inference_snippet,
854
+ ],
855
+ outputs=[url, inference_snippet, error],
856
+ )
857
+ for input_component in [
858
+ model_id,
859
+ create_pr,
860
+ output_model_id,
861
+ backend,
862
+ onnx_quantization_config,
863
+ onnx_optimization_config,
864
+ ov_quant_dataset_name,
865
+ ov_quant_dataset_subset,
866
+ ov_quant_dataset_split,
867
+ ov_quant_dataset_column_name,
868
+ ov_quant_dataset_num_samples,
869
+ ]:
870
+ input_component.change(
871
+ on_change,
872
+ inputs=[
873
+ model_id,
874
+ create_pr,
875
+ output_model_id,
876
+ backend,
877
+ onnx_quantization_config,
878
+ onnx_optimization_config,
879
+ ov_quant_dataset_name,
880
+ ov_quant_dataset_subset,
881
+ ov_quant_dataset_split,
882
+ ov_quant_dataset_column_name,
883
+ ov_quant_dataset_num_samples,
884
+ ],
885
+ outputs=[requirements, export_snippet, inference_snippet, error],
886
+ )
887
+
888
+ if __name__ == "__main__":
889
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ sentence_transformers[onnx-gpu,openvino]==3.3.0
2
+ onnx==1.16.1
3
+ gradio_huggingfacehub_search==0.0.7
4
+ gradio[oauth]==5.5.0
5
+ huggingface_hub==0.26.2