Spaces:
Running
Running
fiwing ToC
Browse files- dist/index.html +18 -11
- dist/style.css +1 -1
- src/index.html +18 -11
- src/style.css +1 -1
dist/index.html
CHANGED
@@ -368,7 +368,7 @@
|
|
368 |
\end{aligned}
|
369 |
</d-math>
|
370 |
|
371 |
-
<p>Now let’s have look how things change if we
|
372 |
|
373 |
<aside>See some more details below when we cover the ZeRO methods.</aside>
|
374 |
|
@@ -2722,22 +2722,29 @@
|
|
2722 |
|
2723 |
window.addEventListener('scroll', (_event) => {
|
2724 |
if (typeof (headings) != 'undefined' && headings != null && typeof (toc_links) != 'undefined' && toc_links != null) {
|
2725 |
-
// Then iterate forwards, on the first match highlight it and break
|
2726 |
find_active: {
|
2727 |
for (let i = headings.length - 1; i >= 0; i--) {
|
2728 |
-
|
2729 |
-
|
2730 |
-
|
2731 |
-
|
2732 |
-
|
2733 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2734 |
}
|
2735 |
break find_active;
|
2736 |
}
|
2737 |
}
|
2738 |
-
toc_links.forEach(
|
2739 |
-
link.classList.remove("active");
|
2740 |
-
});
|
2741 |
}
|
2742 |
}
|
2743 |
});
|
|
|
368 |
\end{aligned}
|
369 |
</d-math>
|
370 |
|
371 |
+
<p>Now let’s have look how things change if we use a lower precision. For stability reason (see <a target="_self" href="#mixed_precision_training">the mixed-precision training section below</a>) we often don't use full low precision training but a mix of higher and lower precision called "mixed precision"<d-cite bibtex-key="micikevicius2018mixedprecisiontraining"></d-cite>. The default nowadays for mixed precision training is to generally use BF16 for most of the computations –requiring 2 bytes per parameter and gradient– as well as an additional copy of the model weights and gradients in FP32, thus 12 bytes per parameter in total. In addition to the parameters and gradient, we need to store the optimizer states: for the Adam optimizer, this requires the momentum and the variance usually stored in FP32 for numerical stability, each using 4 bytes. </p>
|
372 |
|
373 |
<aside>See some more details below when we cover the ZeRO methods.</aside>
|
374 |
|
|
|
2722 |
|
2723 |
window.addEventListener('scroll', (_event) => {
|
2724 |
if (typeof (headings) != 'undefined' && headings != null && typeof (toc_links) != 'undefined' && toc_links != null) {
|
|
|
2725 |
find_active: {
|
2726 |
for (let i = headings.length - 1; i >= 0; i--) {
|
2727 |
+
const heading = headings[i];
|
2728 |
+
// Skip headings that shouldn't be in TOC
|
2729 |
+
if (heading.parentElement.tagName == 'D-TITLE' || heading.getAttribute('no-toc')) {
|
2730 |
+
continue;
|
2731 |
+
}
|
2732 |
+
|
2733 |
+
if (heading.getBoundingClientRect().top - 50 <= 0) {
|
2734 |
+
// Find matching TOC link by href
|
2735 |
+
const headingId = heading.getAttribute('id');
|
2736 |
+
const activeLink = Array.from(toc_links).find(link =>
|
2737 |
+
link.getAttribute('href') === '#' + headingId
|
2738 |
+
);
|
2739 |
+
|
2740 |
+
if (activeLink && !activeLink.classList.contains("active")) {
|
2741 |
+
toc_links.forEach(link => link.classList.remove("active"));
|
2742 |
+
activeLink.classList.add('active');
|
2743 |
}
|
2744 |
break find_active;
|
2745 |
}
|
2746 |
}
|
2747 |
+
toc_links.forEach(link => link.classList.remove("active"));
|
|
|
|
|
2748 |
}
|
2749 |
}
|
2750 |
});
|
dist/style.css
CHANGED
@@ -188,7 +188,7 @@ toggle-icon.collapsed {
|
|
188 |
.toc-content {
|
189 |
margin-top: 15px;
|
190 |
overflow: hidden;
|
191 |
-
max-height: 1000px;
|
192 |
transition: max-height 0.3s ease-out;
|
193 |
}
|
194 |
|
|
|
188 |
.toc-content {
|
189 |
margin-top: 15px;
|
190 |
overflow: hidden;
|
191 |
+
/* max-height: 1000px; */
|
192 |
transition: max-height 0.3s ease-out;
|
193 |
}
|
194 |
|
src/index.html
CHANGED
@@ -368,7 +368,7 @@
|
|
368 |
\end{aligned}
|
369 |
</d-math>
|
370 |
|
371 |
-
<p>Now let’s have look how things change if we
|
372 |
|
373 |
<aside>See some more details below when we cover the ZeRO methods.</aside>
|
374 |
|
@@ -2722,22 +2722,29 @@
|
|
2722 |
|
2723 |
window.addEventListener('scroll', (_event) => {
|
2724 |
if (typeof (headings) != 'undefined' && headings != null && typeof (toc_links) != 'undefined' && toc_links != null) {
|
2725 |
-
// Then iterate forwards, on the first match highlight it and break
|
2726 |
find_active: {
|
2727 |
for (let i = headings.length - 1; i >= 0; i--) {
|
2728 |
-
|
2729 |
-
|
2730 |
-
|
2731 |
-
|
2732 |
-
|
2733 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2734 |
}
|
2735 |
break find_active;
|
2736 |
}
|
2737 |
}
|
2738 |
-
toc_links.forEach(
|
2739 |
-
link.classList.remove("active");
|
2740 |
-
});
|
2741 |
}
|
2742 |
}
|
2743 |
});
|
|
|
368 |
\end{aligned}
|
369 |
</d-math>
|
370 |
|
371 |
+
<p>Now let’s have look how things change if we use a lower precision. For stability reason (see <a target="_self" href="#mixed_precision_training">the mixed-precision training section below</a>) we often don't use full low precision training but a mix of higher and lower precision called "mixed precision"<d-cite bibtex-key="micikevicius2018mixedprecisiontraining"></d-cite>. The default nowadays for mixed precision training is to generally use BF16 for most of the computations –requiring 2 bytes per parameter and gradient– as well as an additional copy of the model weights and gradients in FP32, thus 12 bytes per parameter in total. In addition to the parameters and gradient, we need to store the optimizer states: for the Adam optimizer, this requires the momentum and the variance usually stored in FP32 for numerical stability, each using 4 bytes. </p>
|
372 |
|
373 |
<aside>See some more details below when we cover the ZeRO methods.</aside>
|
374 |
|
|
|
2722 |
|
2723 |
window.addEventListener('scroll', (_event) => {
|
2724 |
if (typeof (headings) != 'undefined' && headings != null && typeof (toc_links) != 'undefined' && toc_links != null) {
|
|
|
2725 |
find_active: {
|
2726 |
for (let i = headings.length - 1; i >= 0; i--) {
|
2727 |
+
const heading = headings[i];
|
2728 |
+
// Skip headings that shouldn't be in TOC
|
2729 |
+
if (heading.parentElement.tagName == 'D-TITLE' || heading.getAttribute('no-toc')) {
|
2730 |
+
continue;
|
2731 |
+
}
|
2732 |
+
|
2733 |
+
if (heading.getBoundingClientRect().top - 50 <= 0) {
|
2734 |
+
// Find matching TOC link by href
|
2735 |
+
const headingId = heading.getAttribute('id');
|
2736 |
+
const activeLink = Array.from(toc_links).find(link =>
|
2737 |
+
link.getAttribute('href') === '#' + headingId
|
2738 |
+
);
|
2739 |
+
|
2740 |
+
if (activeLink && !activeLink.classList.contains("active")) {
|
2741 |
+
toc_links.forEach(link => link.classList.remove("active"));
|
2742 |
+
activeLink.classList.add('active');
|
2743 |
}
|
2744 |
break find_active;
|
2745 |
}
|
2746 |
}
|
2747 |
+
toc_links.forEach(link => link.classList.remove("active"));
|
|
|
|
|
2748 |
}
|
2749 |
}
|
2750 |
});
|
src/style.css
CHANGED
@@ -188,7 +188,7 @@ toggle-icon.collapsed {
|
|
188 |
.toc-content {
|
189 |
margin-top: 15px;
|
190 |
overflow: hidden;
|
191 |
-
max-height: 1000px;
|
192 |
transition: max-height 0.3s ease-out;
|
193 |
}
|
194 |
|
|
|
188 |
.toc-content {
|
189 |
margin-top: 15px;
|
190 |
overflow: hidden;
|
191 |
+
/* max-height: 1000px; */
|
192 |
transition: max-height 0.3s ease-out;
|
193 |
}
|
194 |
|