Update README.md
Browse files
README.md
CHANGED
@@ -127,9 +127,11 @@ tokenizer.push_to_hub(save_to)
|
|
127 |
# Model Quality
|
128 |
TODO
|
129 |
|
130 |
-
#
|
131 |
|
132 |
-
|
|
|
|
|
133 |
|----------------------------------|----------------|-------------------------------|
|
134 |
| | Qwen3-32B | Qwen3-32B-float8dq |
|
135 |
| Peak Memory | 65.72 GB | 34.54 GB (-47.44%) |
|
@@ -196,7 +198,7 @@ print(f"Peak Memory Usage: {mem:.02f} GB")
|
|
196 |
|
197 |
# Model Performance
|
198 |
|
199 |
-
|
200 |
| Benchmark | | |
|
201 |
|----------------------------------|----------------|-------------------------------|
|
202 |
| | Qwen3-32B | Qwen3-32B-float8dq |
|
|
|
127 |
# Model Quality
|
128 |
TODO
|
129 |
|
130 |
+
# Memory Usage
|
131 |
|
132 |
+
Tested on H100
|
133 |
+
|
134 |
+
| Memory | | |
|
135 |
|----------------------------------|----------------|-------------------------------|
|
136 |
| | Qwen3-32B | Qwen3-32B-float8dq |
|
137 |
| Peak Memory | 65.72 GB | 34.54 GB (-47.44%) |
|
|
|
198 |
|
199 |
# Model Performance
|
200 |
|
201 |
+
Tested on H100
|
202 |
| Benchmark | | |
|
203 |
|----------------------------------|----------------|-------------------------------|
|
204 |
| | Qwen3-32B | Qwen3-32B-float8dq |
|