shubhrapandit commited on
Commit
55139a1
·
verified ·
1 Parent(s): cada640

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +28 -28
README.md CHANGED
@@ -341,10 +341,10 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
341
  <td>5.66</td>
342
  <td>4.3</td>
343
  <td>252</td>
344
- <td>4.3</td>
345
- <td>252</td>
346
- <td>1.0</td>
347
- <td>1065</td>
348
  </tr>
349
  </tbody>
350
  </table>
@@ -392,22 +392,22 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
392
  <tr>
393
  <td>neuralmagic/Qwen2.5-VL-72B-Instruct-quantized.w8a8</td>
394
  <td>1.80</td>
395
- <td>1.2</td>
396
- <td>578</td>
397
- <td>4.0</td>
398
- <td>2040</td>
399
- <td>4.6</td>
400
- <td>2266</td>
401
  </tr>
402
  <tr>
403
  <td>neuralmagic/Qwen2.5-VL-72B-Instruct-quantized.w4a16</td>
404
  <td>2.75</td>
405
- <td>2.8</td>
406
- <td>1364</td>
407
- <td>12.8</td>
408
- <td>6352</td>
409
- <td>16.4</td>
410
- <td>8148</td>
411
  </tr>
412
  <tr>
413
  <th rowspan="3" valign="top">H100x4</th>
@@ -423,22 +423,22 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
423
  <tr>
424
  <td>neuralmagic/Qwen2.5-VL-72B-Instruct-FP8-Dynamic</td>
425
  <td>1.73</td>
426
- <td>1.8</td>
427
- <td>479</td>
428
- <td>4.4</td>
429
- <td>1203</td>
430
- <td>4.8</td>
431
- <td>1296</td>
432
  </tr>
433
  <tr>
434
  <td>neuralmagic/Qwen2.5-VL-72B-Instruct-quantized.w4a16</td>
435
  <td>8.27</td>
436
- <td>13.2</td>
437
- <td>3652</td>
438
- <td>13.2</td>
439
- <td>3652</td>
440
- <td>99.2</td>
441
- <td>27108</td>
442
  </tr>
443
  </tbody>
444
  </table>
 
341
  <td>5.66</td>
342
  <td>4.3</td>
343
  <td>252</td>
344
+ <td>4.4</td>
345
+ <td>251</td>
346
+ <td>4.2</td>
347
+ <td>259</td>
348
  </tr>
349
  </tbody>
350
  </table>
 
392
  <tr>
393
  <td>neuralmagic/Qwen2.5-VL-72B-Instruct-quantized.w8a8</td>
394
  <td>1.80</td>
395
+ <td>0.6</td>
396
+ <td>289</td>
397
+ <td>2.0</td>
398
+ <td>1020</td>
399
+ <td>2.3</td>
400
+ <td>1133</td>
401
  </tr>
402
  <tr>
403
  <td>neuralmagic/Qwen2.5-VL-72B-Instruct-quantized.w4a16</td>
404
  <td>2.75</td>
405
+ <td>0.7</td>
406
+ <td>341</td>
407
+ <td>3.2</td>
408
+ <td>1588</td>
409
+ <td>4.1</td>
410
+ <td>2037</td>
411
  </tr>
412
  <tr>
413
  <th rowspan="3" valign="top">H100x4</th>
 
423
  <tr>
424
  <td>neuralmagic/Qwen2.5-VL-72B-Instruct-FP8-Dynamic</td>
425
  <td>1.73</td>
426
+ <td>0.9</td>
427
+ <td>247</td>
428
+ <td>2.2</td>
429
+ <td>621</td>
430
+ <td>2.4</td>
431
+ <td>669</td>
432
  </tr>
433
  <tr>
434
  <td>neuralmagic/Qwen2.5-VL-72B-Instruct-quantized.w4a16</td>
435
  <td>8.27</td>
436
+ <td>3.3</td>
437
+ <td>913</td>
438
+ <td>3.3</td>
439
+ <td>898</td>
440
+ <td>3.6</td>
441
+ <td>991</td>
442
  </tr>
443
  </tbody>
444
  </table>