medmekk HF Staff jerryzh168 commited on
Commit
fe44b10
Β·
verified Β·
1 Parent(s): c67e752

Update app.py (#1)

Browse files

- Update app.py (7f39dd52ffa030ae557d4d7da9c294cd2e260788)


Co-authored-by: Jerry Zhang <[email protected]>

Files changed (1) hide show
  1. app.py +36 -27
app.py CHANGED
@@ -13,22 +13,25 @@ from torchao.quantization import (
13
  Int8DynamicActivationInt8WeightConfig,
14
  Float8WeightOnlyConfig,
15
  Float8DynamicActivationFloat8WeightConfig,
 
16
  )
17
 
18
  MAP_QUANT_TYPE_TO_NAME = {
19
- "int4_weight_only": "int4wo",
20
- "int8_weight_only": "int8wo",
21
- "int8_dynamic_activation_int8_weight": "int8da8w8",
22
- "float8_weight_only": "float8wo",
23
- "float8_dynamic_activation_float8_weight": "float8da8w8",
 
24
  "autoquant": "autoquant",
25
  }
26
  MAP_QUANT_TYPE_TO_CONFIG = {
27
- "int4_weight_only": Int4WeightOnlyConfig,
28
- "int8_weight_only": Int8WeightOnlyConfig,
29
- "int8_dynamic_activation_int8_weight": Int8DynamicActivationInt8WeightConfig,
30
- "float8_weight_only": Float8WeightOnlyConfig,
31
- "float8_dynamic_activation_float8_weight": Float8DynamicActivationFloat8WeightConfig,
 
32
  }
33
 
34
 
@@ -56,8 +59,7 @@ def check_model_exists(
56
  repo_name = f"{username}/{quantized_model_name}"
57
  else:
58
  if (
59
- quantization_type == "int4_weight_only"
60
- or quantization_type == "int8_weight_only"
61
  ) and (group_size is not None):
62
  repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type.lower()]}-gs{group_size}"
63
  else:
@@ -173,13 +175,13 @@ def quantize_model(
173
  print(f"Quantizing model: {quantization_type}")
174
  progress(0, desc="Preparing Quantization")
175
  if (
176
- quantization_type == "int8_weight_only"
177
  ):
178
  quant_config = MAP_QUANT_TYPE_TO_CONFIG[quantization_type](
179
  group_size=group_size
180
  )
181
  quantization_config = TorchAoConfig(quant_config)
182
- elif quantization_type == "int4_weight_only":
183
  from torchao.dtypes import Int4CPULayout
184
 
185
  quant_config = MAP_QUANT_TYPE_TO_CONFIG[quantization_type](
@@ -233,8 +235,7 @@ def save_model(
233
  repo_name = f"{username}/{quantized_model_name}"
234
  else:
235
  if (
236
- quantization_type == "int4_weight_only"
237
- or quantization_type == "int8_weight_only"
238
  ) and (group_size is not None):
239
  repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type.lower()]}-gs{group_size}"
240
  else:
@@ -318,7 +319,7 @@ def quantize_and_save(
318
  return """
319
  <div class="error-box">
320
  <h3>❌ Group Size Error</h3>
321
- <p>Group Size is a number for int4_weight_only and int8_weight_only or empty for int8_weight_only</p>
322
  </div>
323
  """
324
 
@@ -492,11 +493,12 @@ with gr.Blocks(css=css) as demo:
492
  quantization_type = gr.Dropdown(
493
  info="Select the Quantization method",
494
  choices=[
495
- "int4_weight_only",
496
- "int8_weight_only",
497
- "int8_dynamic_activation_int8_weight",
498
- "float8_weight_only",
499
- "float8_dynamic_activation_float8_weight",
 
500
  "autoquant",
501
  ],
502
  value="int8_weight_only",
@@ -549,11 +551,18 @@ with gr.Blocks(css=css) as demo:
549
  ## πŸ“ Quantization Options
550
 
551
  ### Quantization Types
552
- - **int4_weight_only**: 4-bit weight-only quantization
553
- - **int8_weight_only**: 8-bit weight-only quantization
554
- - **int8_dynamic_activation_int8_weight**: 8-bit quantization for both weights and activations
555
- - **float8_weight_only**: float8-bit weight-only quantization
556
- - **float8_dynamic_activation_float8_weight**: float8-bit quantization for both weights and activations
 
 
 
 
 
 
 
557
  - **autoquant**: automatic quantization (uses the best quantization method for the model)
558
 
559
  ### Group Size
 
13
  Int8DynamicActivationInt8WeightConfig,
14
  Float8WeightOnlyConfig,
15
  Float8DynamicActivationFloat8WeightConfig,
16
+ GemliteUIntXWeightOnlyConfig,
17
  )
18
 
19
  MAP_QUANT_TYPE_TO_NAME = {
20
+ "Int4WeightOnly": "int4wo",
21
+ "GemliteUIntXWeightOnly": "intxwo-gemlite"
22
+ "Int8WeightOnly": "int8wo",
23
+ "Int8DynamicActivationInt8Weight": "int8da8w8",
24
+ "Float8WeightOnly": "float8wo",
25
+ "Float8DynamicActivationFloat8Weight": "float8da8w8",
26
  "autoquant": "autoquant",
27
  }
28
  MAP_QUANT_TYPE_TO_CONFIG = {
29
+ "Int4WeightOnly": Int4WeightOnlyConfig,
30
+ "GemliteUIntXWeightOnly": GemliteUIntXWeightOnlyConfig,
31
+ "Int8WeightOnly": Int8WeightOnlyConfig,
32
+ "Int8DynamicActivationInt8Weight": Int8DynamicActivationInt8WeightConfig,
33
+ "Float8WeightOnly": Float8WeightOnlyConfig,
34
+ "Float8DynamicActivationFloat8Weight": Float8DynamicActivationFloat8WeightConfig,
35
  }
36
 
37
 
 
59
  repo_name = f"{username}/{quantized_model_name}"
60
  else:
61
  if (
62
+ quantization_type in ["Int4WeightOnly", "GemliteUIntXWeightOnly"]
 
63
  ) and (group_size is not None):
64
  repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type.lower()]}-gs{group_size}"
65
  else:
 
175
  print(f"Quantizing model: {quantization_type}")
176
  progress(0, desc="Preparing Quantization")
177
  if (
178
+ quantization_type == "GemliteUIntXWeightOnly"
179
  ):
180
  quant_config = MAP_QUANT_TYPE_TO_CONFIG[quantization_type](
181
  group_size=group_size
182
  )
183
  quantization_config = TorchAoConfig(quant_config)
184
+ elif quantization_type == "Int4WeightOnly":
185
  from torchao.dtypes import Int4CPULayout
186
 
187
  quant_config = MAP_QUANT_TYPE_TO_CONFIG[quantization_type](
 
235
  repo_name = f"{username}/{quantized_model_name}"
236
  else:
237
  if (
238
+ quantization_type in ["Int4WeightOnly", "GemliteUIntXWeightOnly"]
 
239
  ) and (group_size is not None):
240
  repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type.lower()]}-gs{group_size}"
241
  else:
 
319
  return """
320
  <div class="error-box">
321
  <h3>❌ Group Size Error</h3>
322
+ <p>Group Size is a parameter for Int4WeightOnly or GemliteUIntXWeightOnly</p>
323
  </div>
324
  """
325
 
 
493
  quantization_type = gr.Dropdown(
494
  info="Select the Quantization method",
495
  choices=[
496
+ "Int4WeightOnly",
497
+ "GemliteUIntXWeightOnly"
498
+ "Int8WeightOnly",
499
+ "Int8DynamicActivationInt8Weight",
500
+ "Float8WeightOnly",
501
+ "Float8DynamicActivationFloat8Weight",
502
  "autoquant",
503
  ],
504
  value="int8_weight_only",
 
551
  ## πŸ“ Quantization Options
552
 
553
  ### Quantization Types
554
+ "Int4WeightOnly",
555
+ "GemliteUIntXWeightOnly"
556
+ "Int8WeightOnly",
557
+ "Int8DynamicActivationInt8Weight",
558
+ "Float8WeightOnly",
559
+ "Float8DynamicActivationFloat8Weight",
560
+ - **Int4WeightOnly**: 4-bit weight-only quantization
561
+ - **GemliteUIntXWeightOnly**: uintx gemlite quantization (default to 4 bit only for now)
562
+ - **Int8WeightOnly**: 8-bit weight-only quantization
563
+ - **Int8DynamicActivationInt8Weight**: 8-bit quantization for both weights and activations
564
+ - **Float8WeightOnly**: float8-bit weight-only quantization
565
+ - **Float8DynamicActivationFloat8Weight**: float8-bit quantization for both weights and activations
566
  - **autoquant**: automatic quantization (uses the best quantization method for the model)
567
 
568
  ### Group Size