dewdev commited on
Commit
8cfa1b2
·
verified ·
1 Parent(s): f085d55

Upload to_onnx.py

Browse files
Files changed (1) hide show
  1. to_onnx.py +39 -231
to_onnx.py CHANGED
@@ -1,256 +1,64 @@
1
- import os
2
  import torch
3
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
4
- from onnxruntime.quantization import quantize_dynamic, quantize_static, QuantType
5
- from onnxruntime.quantization.calibrate import CalibrationDataReader
6
- import onnx
7
- import time
8
  import numpy as np
9
 
10
- def ensure_directory(path):
11
- """Create directory if it doesn't exist"""
12
- abs_path = os.path.abspath(path)
13
- if not os.path.exists(abs_path):
14
- os.makedirs(abs_path)
15
- print(f"Created directory: {abs_path}")
16
- return abs_path
17
-
18
- def verify_file_exists(file_path, timeout=5):
19
- """Verify that a file exists and is not empty"""
20
- start_time = time.time()
21
- while time.time() - start_time < timeout:
22
- if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
23
- return True
24
- time.sleep(0.1)
25
- return False
26
-
27
- def export_to_onnx(model, tokenizer, save_path):
28
- """Export model to ONNX format"""
29
  try:
30
- # Create a dummy input for the model
31
- dummy_input = tokenizer("This is a sample input", return_tensors="pt")
32
-
33
- # Export the model to ONNX
34
- torch.onnx.export(
35
- model,
36
- (dummy_input["input_ids"], dummy_input["attention_mask"]),
37
- save_path,
38
- opset_version=14,
39
- input_names=["input_ids", "attention_mask"],
40
- output_names=["output"],
41
- dynamic_axes={
42
- "input_ids": {0: "batch_size"},
43
- "attention_mask": {0: "batch_size"},
44
- "output": {0: "batch_size"}
45
- }
46
- )
47
-
48
- # Verify the file was created
49
- if verify_file_exists(save_path):
50
- print(f"Successfully exported ONNX model to {save_path}")
51
- return True
52
- else:
53
- print(f"Failed to verify ONNX model at {save_path}")
54
- return False
55
- except Exception as e:
56
- print(f"Error exporting to ONNX: {str(e)}")
57
- return False
58
-
59
- def create_calibration_dataset(tokenizer, max_length=512):
60
- """Generate calibration dataset for static quantization with padding"""
61
- samples = [
62
- "This is an English sentence.",
63
- "Dies ist ein deutscher Satz.",
64
- "C'est une phrase française.",
65
- "Esta es una frase en español.",
66
- "这是一个中文句子。",
67
- "これは日本語の文章です。"
68
- ]
69
-
70
- # Tokenize with padding and truncation
71
- encoded_samples = []
72
- for text in samples:
73
- encoded = tokenizer(
74
- text,
75
- padding='max_length',
76
- max_length=max_length,
77
- truncation=True,
78
- return_tensors="pt"
79
- )
80
- encoded_samples.append({
81
- 'input_ids': encoded['input_ids'],
82
- 'attention_mask': encoded['attention_mask']
83
- })
84
-
85
- return encoded_samples
86
-
87
- class CalibrationLoader(CalibrationDataReader):
88
- def __init__(self, calibration_data):
89
- self.calibration_data = calibration_data
90
- self.current_index = 0
91
-
92
- def get_next(self):
93
- if self.current_index >= len(self.calibration_data):
94
- return None
95
-
96
- current_data = self.calibration_data[self.current_index]
97
- self.current_index += 1
98
-
99
- # Ensure we're returning numpy arrays with the correct shape
100
- return {
101
- 'input_ids': current_data['input_ids'].numpy(),
102
- 'attention_mask': current_data['attention_mask'].numpy()
103
- }
104
-
105
- def rewind(self):
106
- self.current_index = 0
107
 
108
- def export_to_onnx(model, tokenizer, save_path, max_length=512):
109
- """Export model to ONNX format with fixed dimensions"""
110
- try:
111
- # Create a dummy input with fixed dimensions
112
- dummy_input = tokenizer(
113
- "This is a sample input",
114
- padding='max_length',
115
- max_length=max_length,
116
- truncation=True,
117
- return_tensors="pt"
118
- )
119
 
120
  # Export the model to ONNX
 
 
121
  torch.onnx.export(
122
  model,
123
  (dummy_input["input_ids"], dummy_input["attention_mask"]),
124
- save_path,
125
- opset_version=14,
126
  input_names=["input_ids", "attention_mask"],
127
  output_names=["output"],
128
  dynamic_axes={
129
- "input_ids": {0: "batch_size"},
130
- "attention_mask": {0: "batch_size"}
131
- }
 
 
132
  )
133
 
134
- if verify_file_exists(save_path):
135
- print(f"Successfully exported ONNX model to {save_path}")
136
- return True
137
- else:
138
- print(f"Failed to verify ONNX model at {save_path}")
139
- return False
140
- except Exception as e:
141
- print(f"Error exporting to ONNX: {str(e)}")
142
- return False
143
 
144
- def quantize_model(base_onnx_path, onnx_dir, config_name, calibration_dataset=None):
145
- """
146
- Quantize ONNX model using either dynamic or static quantization.
147
 
148
- Args:
149
- base_onnx_path (str): Path to the base ONNX model
150
- onnx_dir (str): Directory to save quantized models
151
- config_name (str): Type of quantization ('dynamic' or 'static')
152
- calibration_dataset (list, optional): Dataset for static quantization calibration
153
- """
154
- try:
155
- quantized_model_path = os.path.join(onnx_dir, f"model_{config_name}_quantized.onnx")
156
-
157
- if config_name == "dynamic":
158
- print(f"\nPerforming dynamic quantization...")
159
- quantize_dynamic(
160
- model_input=base_onnx_path,
161
- model_output=quantized_model_path,
162
- weight_type=QuantType.QUInt8
163
- )
164
 
165
- elif config_name == "static" and calibration_dataset is not None:
166
- print(f"\nPerforming static quantization...")
167
- calibration_loader = CalibrationLoader(calibration_dataset)
168
- quantize_static(
169
- model_input=base_onnx_path,
170
- model_output=quantized_model_path,
171
- calibration_data_reader=calibration_loader,
172
- quant_format=QuantType.QUInt8
173
- )
174
 
175
- else:
176
- print(f"Invalid quantization configuration: {config_name}")
177
- return False
178
 
179
- # Verify the quantized model exists
180
- if verify_file_exists(quantized_model_path):
181
- print(f"Successfully created {config_name} quantized model at {quantized_model_path}")
182
 
183
- # Print file sizes for comparison
184
- base_size = os.path.getsize(base_onnx_path) / (1024 * 1024) # Convert to MB
185
- quantized_size = os.path.getsize(quantized_model_path) / (1024 * 1024) # Convert to MB
186
-
187
- print(f"Original model size: {base_size:.2f} MB")
188
- print(f"Quantized model size: {quantized_size:.2f} MB")
189
- print(f"Size reduction: {((base_size - quantized_size) / base_size * 100):.2f}%")
190
-
191
- return True
192
- else:
193
- print(f"Failed to verify quantized model at {quantized_model_path}")
194
- return False
195
 
196
  except Exception as e:
197
- print(f"Error during {config_name} quantization: {str(e)}")
198
- return False
199
-
200
-
201
- def main():
202
- # Get absolute paths
203
- current_dir = os.path.abspath(os.getcwd())
204
- onnx_dir = ensure_directory(os.path.join(current_dir, "onnx"))
205
- base_onnx_path = os.path.join(onnx_dir, "model.onnx")
206
-
207
- print(f"Working directory: {current_dir}")
208
- print(f"ONNX directory: {onnx_dir}")
209
- print(f"Base ONNX model path: {base_onnx_path}")
210
-
211
- # Step 1: Load model and tokenizer
212
- print("\nLoading model and tokenizer...")
213
- model_name = "alexneakameni/language_detection"
214
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
215
- tokenizer = AutoTokenizer.from_pretrained(model_name)
216
-
217
- # Get the model's default max_length
218
- max_length = tokenizer.model_max_length
219
-
220
- # Step 2: Export base ONNX model
221
- if not export_to_onnx(model, tokenizer, base_onnx_path, max_length):
222
- print("Failed to export base ONNX model. Exiting.")
223
- return
224
-
225
- # Verify the ONNX model
226
- try:
227
- print(f"Verifying ONNX model at: {base_onnx_path}")
228
- onnx_model = onnx.load(base_onnx_path)
229
- print("Successfully verified ONNX model")
230
- except Exception as e:
231
- print(f"Error verifying ONNX model: {str(e)}")
232
- return
233
-
234
- # Step 3: Create calibration dataset
235
- calibration_dataset = create_calibration_dataset(tokenizer, max_length)
236
-
237
- # Step 4: Create quantized versions
238
- print("\nCreating quantized versions...")
239
-
240
- # Dynamic quantization
241
- quantize_model(
242
- base_onnx_path=base_onnx_path,
243
- onnx_dir=onnx_dir,
244
- config_name="dynamic"
245
- )
246
-
247
- # Static quantization
248
- quantize_model(
249
- base_onnx_path=base_onnx_path,
250
- onnx_dir=onnx_dir,
251
- config_name="static",
252
- calibration_dataset=calibration_dataset
253
- )
254
 
255
  if __name__ == "__main__":
256
- main()
 
 
 
 
1
  import torch
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
+ import onnxruntime as ort
 
 
 
4
  import numpy as np
5
 
6
+ def convert_and_test_onnx(model_name, output_path="language_detection.onnx", test_text="This is a test sentence."):
7
+ """
8
+ Converts a Hugging Face model to ONNX, modifies the tokenizer, and tests the ONNX model.
9
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  try:
11
+ # Load the tokenizer and model
12
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
13
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # Modify the tokenizer's normalizer
16
+ if hasattr(tokenizer.backend_tokenizer.normalizer, "normalizations"):
17
+ tokenizer.backend_tokenizer.normalizer.normalizations = []
18
+ tokenizer.save_pretrained("./modified_tokenizer")
 
 
 
 
 
 
 
19
 
20
  # Export the model to ONNX
21
+ dummy_input = tokenizer("This is a test sentence.", return_tensors="pt")
22
+
23
  torch.onnx.export(
24
  model,
25
  (dummy_input["input_ids"], dummy_input["attention_mask"]),
26
+ output_path,
 
27
  input_names=["input_ids", "attention_mask"],
28
  output_names=["output"],
29
  dynamic_axes={
30
+ "input_ids": {0: "batch", 1: "sequence"},
31
+ "attention_mask": {0: "batch", 1: "sequence"},
32
+ "output": {0: "batch"},
33
+ },
34
+ opset_version=14,
35
  )
36
 
37
+ print(f"Model successfully converted and saved to {output_path}")
 
 
 
 
 
 
 
 
38
 
39
+ # Test the ONNX model
40
+ ort_session = ort.InferenceSession(output_path)
41
+ tokenizer_test = AutoTokenizer.from_pretrained("./modified_tokenizer")
42
 
43
+ # Explicitly set return_token_type_ids=False
44
+ inputs = tokenizer_test(test_text, return_tensors="np", return_token_type_ids=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ ort_inputs = {k: v for k, v in inputs.items()}
 
 
 
 
 
 
 
 
47
 
48
+ ort_outputs = ort_session.run(None, ort_inputs)
49
+ logits = ort_outputs[0]
50
+ predicted_class_id = np.argmax(logits, axis=-1)
51
 
52
+ label_list = model.config.id2label
53
+ predicted_label = label_list[predicted_class_id[0]]
 
54
 
55
+ print(f"Test text: {test_text}")
56
+ print(f"Predicted label: {predicted_label}")
 
 
 
 
 
 
 
 
 
 
57
 
58
  except Exception as e:
59
+ print(f"Error during conversion or testing: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  if __name__ == "__main__":
62
+ model_name = "dewdev/language_detection"
63
+ test_text = "मैंने राजा को हिंदी में एक पत्र लिखा।"
64
+ convert_and_test_onnx(model_name, test_text=test_text)