Update README.md
Browse files
README.md
CHANGED
@@ -341,10 +341,10 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
|
|
341 |
<td>5.66</td>
|
342 |
<td>4.3</td>
|
343 |
<td>252</td>
|
344 |
-
<td>4.
|
345 |
-
<td>
|
346 |
-
<td>
|
347 |
-
<td>
|
348 |
</tr>
|
349 |
</tbody>
|
350 |
</table>
|
@@ -392,22 +392,22 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
|
|
392 |
<tr>
|
393 |
<td>neuralmagic/Qwen2.5-VL-72B-Instruct-quantized.w8a8</td>
|
394 |
<td>1.80</td>
|
395 |
-
<td>
|
396 |
-
<td>
|
397 |
-
<td>
|
398 |
-
<td>
|
399 |
-
<td>
|
400 |
-
<td>
|
401 |
</tr>
|
402 |
<tr>
|
403 |
<td>neuralmagic/Qwen2.5-VL-72B-Instruct-quantized.w4a16</td>
|
404 |
<td>2.75</td>
|
405 |
-
<td>
|
406 |
-
<td>
|
407 |
-
<td>
|
408 |
-
<td>
|
409 |
-
<td>
|
410 |
-
<td>
|
411 |
</tr>
|
412 |
<tr>
|
413 |
<th rowspan="3" valign="top">H100x4</th>
|
@@ -423,22 +423,22 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
|
|
423 |
<tr>
|
424 |
<td>neuralmagic/Qwen2.5-VL-72B-Instruct-FP8-Dynamic</td>
|
425 |
<td>1.73</td>
|
426 |
-
<td>
|
427 |
-
<td>
|
428 |
-
<td>
|
429 |
-
<td>
|
430 |
-
<td>4
|
431 |
-
<td>
|
432 |
</tr>
|
433 |
<tr>
|
434 |
<td>neuralmagic/Qwen2.5-VL-72B-Instruct-quantized.w4a16</td>
|
435 |
<td>8.27</td>
|
436 |
-
<td>
|
437 |
-
<td>
|
438 |
-
<td>
|
439 |
-
<td>
|
440 |
-
<td>
|
441 |
-
<td>
|
442 |
</tr>
|
443 |
</tbody>
|
444 |
</table>
|
|
|
341 |
<td>5.66</td>
|
342 |
<td>4.3</td>
|
343 |
<td>252</td>
|
344 |
+
<td>4.4</td>
|
345 |
+
<td>251</td>
|
346 |
+
<td>4.2</td>
|
347 |
+
<td>259</td>
|
348 |
</tr>
|
349 |
</tbody>
|
350 |
</table>
|
|
|
392 |
<tr>
|
393 |
<td>neuralmagic/Qwen2.5-VL-72B-Instruct-quantized.w8a8</td>
|
394 |
<td>1.80</td>
|
395 |
+
<td>0.6</td>
|
396 |
+
<td>289</td>
|
397 |
+
<td>2.0</td>
|
398 |
+
<td>1020</td>
|
399 |
+
<td>2.3</td>
|
400 |
+
<td>1133</td>
|
401 |
</tr>
|
402 |
<tr>
|
403 |
<td>neuralmagic/Qwen2.5-VL-72B-Instruct-quantized.w4a16</td>
|
404 |
<td>2.75</td>
|
405 |
+
<td>0.7</td>
|
406 |
+
<td>341</td>
|
407 |
+
<td>3.2</td>
|
408 |
+
<td>1588</td>
|
409 |
+
<td>4.1</td>
|
410 |
+
<td>2037</td>
|
411 |
</tr>
|
412 |
<tr>
|
413 |
<th rowspan="3" valign="top">H100x4</th>
|
|
|
423 |
<tr>
|
424 |
<td>neuralmagic/Qwen2.5-VL-72B-Instruct-FP8-Dynamic</td>
|
425 |
<td>1.73</td>
|
426 |
+
<td>0.9</td>
|
427 |
+
<td>247</td>
|
428 |
+
<td>2.2</td>
|
429 |
+
<td>621</td>
|
430 |
+
<td>2.4</td>
|
431 |
+
<td>669</td>
|
432 |
</tr>
|
433 |
<tr>
|
434 |
<td>neuralmagic/Qwen2.5-VL-72B-Instruct-quantized.w4a16</td>
|
435 |
<td>8.27</td>
|
436 |
+
<td>3.3</td>
|
437 |
+
<td>913</td>
|
438 |
+
<td>3.3</td>
|
439 |
+
<td>898</td>
|
440 |
+
<td>3.6</td>
|
441 |
+
<td>991</td>
|
442 |
</tr>
|
443 |
</tbody>
|
444 |
</table>
|