Update README.md
Browse files
README.md
CHANGED
@@ -129,9 +129,7 @@ TODO
|
|
129 |
|
130 |
# Memory Usage
|
131 |
|
132 |
-
|
133 |
-
|
134 |
-
| Memory | | |
|
135 |
|----------------------------------|----------------|-------------------------------|
|
136 |
| | Qwen3-32B | Qwen3-32B-float8dq |
|
137 |
| Peak Memory | 65.72 GB | 34.54 GB (-47.44%) |
|
@@ -198,8 +196,8 @@ print(f"Peak Memory Usage: {mem:.02f} GB")
|
|
198 |
|
199 |
# Model Performance
|
200 |
|
201 |
-
|
202 |
-
| Benchmark
|
203 |
|----------------------------------|----------------|-------------------------------|
|
204 |
| | Qwen3-32B | Qwen3-32B-float8dq |
|
205 |
| latency (batch_size=1) | 9.1s | 5.77s (-36.6%) |
|
|
|
129 |
|
130 |
# Memory Usage
|
131 |
|
132 |
+
| Memory (tested on H100) | | |
|
|
|
|
|
133 |
|----------------------------------|----------------|-------------------------------|
|
134 |
| | Qwen3-32B | Qwen3-32B-float8dq |
|
135 |
| Peak Memory | 65.72 GB | 34.54 GB (-47.44%) |
|
|
|
196 |
|
197 |
# Model Performance
|
198 |
|
199 |
+
|
200 |
+
| Benchmark (Tested on H100) | | |
|
201 |
|----------------------------------|----------------|-------------------------------|
|
202 |
| | Qwen3-32B | Qwen3-32B-float8dq |
|
203 |
| latency (batch_size=1) | 9.1s | 5.77s (-36.6%) |
|