lvkaokao commited on
Commit
653f44e
·
1 Parent(s): dca5dbd

support fp32/fp16/bf16 eval.

Browse files
app.py CHANGED
@@ -572,7 +572,7 @@ with demo:
572
  base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)",
573
  visible=not IS_PUBLIC)
574
  compute_type = gr.Dropdown(
575
- choices=[i.value.name for i in ComputeDtype],
576
  label="Compute dtype",
577
  multiselect=False,
578
  value="float16",
 
572
  base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)",
573
  visible=not IS_PUBLIC)
574
  compute_type = gr.Dropdown(
575
+ choices=[i.value.name for i in ComputeDtype if i.value.name != "All"],
576
  label="Compute dtype",
577
  multiselect=False,
578
  value="float16",
src/display/utils.py CHANGED
@@ -242,6 +242,9 @@ class WeightDtype(Enum):
242
  int4 = ModelDetails("int4")
243
  nf4 = ModelDetails("nf4")
244
  fp4 = ModelDetails("fp4")
 
 
 
245
 
246
  Unknown = ModelDetails("?")
247
 
@@ -260,6 +263,12 @@ class WeightDtype(Enum):
260
  return WeightDtype.fp4
261
  if weight_dtype in ["All"]:
262
  return WeightDtype.all
 
 
 
 
 
 
263
  return WeightDtype.Unknown
264
 
265
  class ComputeDtype(Enum):
@@ -317,8 +326,9 @@ class Precision(Enum):
317
  qt_2bit = ModelDetails("2bit")
318
  qt_3bit = ModelDetails("3bit")
319
  qt_4bit = ModelDetails("4bit")
320
- # qt_8bit = ModelDetails("8bit")
321
- # qt_GPTQ = ModelDetails("GPTQ")
 
322
  Unknown = ModelDetails("?")
323
 
324
  def from_str(precision):
@@ -332,8 +342,12 @@ class Precision(Enum):
332
  return Precision.qt_3bit
333
  if precision in ["4bit"]:
334
  return Precision.qt_4bit
335
- # if precision in ["GPTQ", "None"]:
336
- # return Precision.qt_GPTQ
 
 
 
 
337
  return Precision.Unknown
338
 
339
 
 
242
  int4 = ModelDetails("int4")
243
  nf4 = ModelDetails("nf4")
244
  fp4 = ModelDetails("fp4")
245
+ fp16 = ModelDetails("float16")
246
+ bf16 = ModelDetails("bfloat16")
247
+ fp32 = ModelDetails("float32")
248
 
249
  Unknown = ModelDetails("?")
250
 
 
263
  return WeightDtype.fp4
264
  if weight_dtype in ["All"]:
265
  return WeightDtype.all
266
+ if weight_dtype in ["float16"]:
267
+ return WeightDtype.fp16
268
+ if weight_dtype in ["bfloat16"]:
269
+ return WeightDtype.bf16
270
+ if weight_dtype in ["float32"]:
271
+ return WeightDtype.fp32
272
  return WeightDtype.Unknown
273
 
274
  class ComputeDtype(Enum):
 
326
  qt_2bit = ModelDetails("2bit")
327
  qt_3bit = ModelDetails("3bit")
328
  qt_4bit = ModelDetails("4bit")
329
+ qt_8bit = ModelDetails("8bit")
330
+ qt_16bit = ModelDetails("16bit")
331
+ qt_32bit = ModelDetails("32bit")
332
  Unknown = ModelDetails("?")
333
 
334
  def from_str(precision):
 
342
  return Precision.qt_3bit
343
  if precision in ["4bit"]:
344
  return Precision.qt_4bit
345
+ if precision in ["8bit"]:
346
+ return Precision.qt_8bit
347
+ if precision in ["16bit"]:
348
+ return Precision.qt_16bit
349
+ if precision in ["32bit"]:
350
+ return Precision.qt_32bit
351
  return Precision.Unknown
352
 
353
 
src/leaderboard/read_evals.py CHANGED
@@ -56,7 +56,7 @@ class EvalResult:
56
 
57
  # Precision
58
  precision = Precision.from_str(config.get("precision", "4bit"))
59
- quant_type = QuantType.from_str(config.get("quant_type", "GPTQ"))
60
  weight_dtype = WeightDtype.from_str(data["task_info"].get("weight_dtype", "int4"))
61
  compute_dtype = ComputeDtype.from_str(data["task_info"].get("compute_dtype", "bfloat16"))
62
  # double_quant = data["quantization_config"].get("bnb_4bit_use_double_quant", False)
@@ -209,7 +209,7 @@ def get_request_file_for_model(requests_path, model_name,
209
  if (
210
  req_content["status"] in ["Finished"]
211
  and req_content["precision"] == precision.split(".")[-1]
212
- and req_content["quant_type"] == quant_type
213
  and req_content["weight_dtype"] == weight_dtype.split(".")[-1]
214
  and req_content["compute_dtype"] == compute_dtype.split(".")[-1]
215
  ):
 
56
 
57
  # Precision
58
  precision = Precision.from_str(config.get("precision", "4bit"))
59
+ quant_type = QuantType.from_str(str(config.get("quant_type", "GPTQ")))
60
  weight_dtype = WeightDtype.from_str(data["task_info"].get("weight_dtype", "int4"))
61
  compute_dtype = ComputeDtype.from_str(data["task_info"].get("compute_dtype", "bfloat16"))
62
  # double_quant = data["quantization_config"].get("bnb_4bit_use_double_quant", False)
 
209
  if (
210
  req_content["status"] in ["Finished"]
211
  and req_content["precision"] == precision.split(".")[-1]
212
+ and str(req_content["quant_type"]) == quant_type
213
  and req_content["weight_dtype"] == weight_dtype.split(".")[-1]
214
  and req_content["compute_dtype"] == compute_dtype.split(".")[-1]
215
  ):
src/submission/check_validity.py CHANGED
@@ -69,13 +69,27 @@ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_rem
69
  return True, "uses a gated model.", None
70
  return False, f"was not found or misconfigured on the hub! Error raised was {e.args[0]}", None
71
 
 
72
  def get_model_size(model_info: ModelInfo, precision: str):
73
  size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
74
  safetensors = None
75
  try:
76
  safetensors = get_safetensors_metadata(model_info.id)
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  except Exception as e:
78
- print(e)
79
 
80
  if safetensors is not None:
81
  model_size = round(sum(safetensors.parameter_count.values()) / 1e9, 3)
@@ -87,9 +101,13 @@ def get_model_size(model_info: ModelInfo, precision: str):
87
  except AttributeError as e:
88
  return 0 # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
89
 
90
- size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1
91
  # model_size = size_factor * model_size
92
- return model_size
 
 
 
 
93
 
94
  KNOWN_SIZE_FACTOR = {
95
  "gptq": {"4bit": 8, "8bit": 4, "2bit": 8, "3bit": 12},
 
69
  return True, "uses a gated model.", None
70
  return False, f"was not found or misconfigured on the hub! Error raised was {e.args[0]}", None
71
 
72
+
73
  def get_model_size(model_info: ModelInfo, precision: str):
74
  size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
75
  safetensors = None
76
  try:
77
  safetensors = get_safetensors_metadata(model_info.id)
78
+ num_parameters = 0
79
+ mem = 0
80
+ for key in safetensors.parameter_count:
81
+ if key in ["F16", "BF16"]:
82
+ mem += safetensors.parameter_count[key] * 2
83
+ else:
84
+ mem += safetensors.parameter_count[key] * 4
85
+
86
+ num_parameters += safetensors.parameter_count[key]
87
+
88
+ params_b = round(num_parameters / 1e9, 2)
89
+ size_gb = round(mem / 1e9,2)
90
+ return params_b, size_gb
91
  except Exception as e:
92
+ print(str(e))
93
 
94
  if safetensors is not None:
95
  model_size = round(sum(safetensors.parameter_count.values()) / 1e9, 3)
 
101
  except AttributeError as e:
102
  return 0 # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
103
 
104
+ # size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1
105
  # model_size = size_factor * model_size
106
+ if precision == "16bit":
107
+ size_gb = model_size * 2
108
+ else:
109
+ size_gb = model_size * 4
110
+ return model_size, size_gb
111
 
112
  KNOWN_SIZE_FACTOR = {
113
  "gptq": {"4bit": 8, "8bit": 4, "2bit": 8, "3bit": 12},
src/submission/submit.py CHANGED
@@ -157,11 +157,36 @@ def add_new_eval(
157
  weight_dtype = "int2"
158
 
159
  if quant_type is None or quant_type == "":
160
- return styled_error("Please select a quantization model like GPTQ, AWQ etc.")
161
-
162
- model_params, model_size = get_quantized_model_parameters_memory(model_info,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  quant_method=quant_type.lower(),
164
  bits=precision)
 
 
 
 
 
 
165
 
166
  if quant_type == "llama.cpp":
167
  hardware = "cpu"
@@ -170,6 +195,9 @@ def add_new_eval(
170
  else:
171
  hardware = "gpu"
172
 
 
 
 
173
  eval_entry = {
174
  "model": model,
175
  "revision": revision,
@@ -187,7 +215,7 @@ def add_new_eval(
187
  "hardware": hardware,
188
  "status": "Pending",
189
  "submitted_time": current_time,
190
- "model_type": "quantization",
191
  "job_id": -1,
192
  "job_start_time": None,
193
  "scripts": script
 
157
  weight_dtype = "int2"
158
 
159
  if quant_type is None or quant_type == "":
160
+ # return styled_error("Please select a quantization model like GPTQ, AWQ etc.")
161
+ # for eval fp32/fp16/bf16
162
+ quant_type = None
163
+
164
+ if quant_type is None:
165
+ weight_dtype = str(getattr(model_config, "torch_dtype", "float16"))
166
+ if weight_dtype in ["torch.float16", "float16"]:
167
+ weight_dtype = "float16"
168
+ precision = "16bit"
169
+ elif weight_dtype in ["torch.bfloat16", "bfloat16"]:
170
+ weight_dtype = "bfloat16"
171
+ precision = "16bit"
172
+ elif weight_dtype in ["torch.float32", "float32"]:
173
+ weight_dtype = "float32"
174
+ precision = "32bit"
175
+ else:
176
+ weight_dtype = "?"
177
+ precision = "?"
178
+ model_type = "original"
179
+ model_params, model_size = get_model_size(model_info=model_info, precision=precision)
180
+ else:
181
+ model_params, model_size = get_quantized_model_parameters_memory(model_info,
182
  quant_method=quant_type.lower(),
183
  bits=precision)
184
+ model_type = "quantization"
185
+ else:
186
+ model_params, model_size = get_quantized_model_parameters_memory(model_info,
187
+ quant_method=quant_type.lower(),
188
+ bits=precision)
189
+ model_type = "quantization"
190
 
191
  if quant_type == "llama.cpp":
192
  hardware = "cpu"
 
195
  else:
196
  hardware = "gpu"
197
 
198
+ if compute_dtype == "?":
199
+ compute_dtype = "float16"
200
+
201
  eval_entry = {
202
  "model": model,
203
  "revision": revision,
 
215
  "hardware": hardware,
216
  "status": "Pending",
217
  "submitted_time": current_time,
218
+ "model_type": model_type,
219
  "job_id": -1,
220
  "job_start_time": None,
221
  "scripts": script