made toc links become underlined when active
Browse files- index.html +25 -2
- style.css +4 -0
index.html
CHANGED
@@ -689,9 +689,9 @@
|
|
689 |
<p>We explored various prompts and found that the additive scale by Yuan et al.<d-cite bibtex-key="yuan2024self"></d-cite> worked best. This scale allows the LLM to reason about each additional point awarded, unlike the single-rating Likert scale which fits samples into predefined boxes. Then, to avoid the LLM favoring highly technical pages like arXiv abstracts and submissions, we focused on grade-school and middle-school level knowledge. By setting a threshold of 3 (on a scale of 0 to 5) during the filtering process, we were able to also retain some high-level educational pages.</p>
|
690 |
<div style="text-align: center; margin: 20px 0;">
|
691 |
<img src="https://cdn-uploads.huggingface.co/production/uploads/61c141342aac764ce1654e43/fjZQ4izIj1rx1xQnBTKKr.png" alt="Prompt for LLM annotation" style="width: 90%; max-width: 800px; height: auto;">
|
692 |
-
<figcaption style="font-style: italic; margin-top: 10px;">Prompt used for
|
693 |
</div>
|
694 |
-
<p>We also experimented with <a href="https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1">Mixtral-
|
695 |
<h3>Classifier Training</h3>
|
696 |
<p>We added a classification head with a single regression output to <a href="https://huggingface.co/Snowflake/snowflake-arctic-embed-m">Snowflake-arctic-embed</a> and trained it on 450,000 Llama 3 annotations for 20 epochs with a learning rate of 3e-4, freezing the embedding and encoder layers. We saved the checkpoint with the highest F1 score on our held-out validation set of 45k samples, treating Llama 3 annotations as ground-truth. After training, we rounded the scores to integers from 0 to 5.</p>
|
697 |
<p>We then converted the problem to a binary classification task by using a fixed threshold to determine if a file is educational. With a threshold of 3, the model achieved an F1 score of 82% on the validation set, indicating strong performance in distinguishing high-quality educational content.</p>
|
@@ -767,6 +767,29 @@
|
|
767 |
ToC += '</nav>';
|
768 |
toc.innerHTML = ToC;
|
769 |
toc.setAttribute('prerendered', 'true');
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
770 |
}
|
771 |
</script>
|
772 |
</body>
|
|
|
689 |
<p>We explored various prompts and found that the additive scale by Yuan et al.<d-cite bibtex-key="yuan2024self"></d-cite> worked best. This scale allows the LLM to reason about each additional point awarded, unlike the single-rating Likert scale which fits samples into predefined boxes. Then, to avoid the LLM favoring highly technical pages like arXiv abstracts and submissions, we focused on grade-school and middle-school level knowledge. By setting a threshold of 3 (on a scale of 0 to 5) during the filtering process, we were able to also retain some high-level educational pages.</p>
|
690 |
<div style="text-align: center; margin: 20px 0;">
|
691 |
<img src="https://cdn-uploads.huggingface.co/production/uploads/61c141342aac764ce1654e43/fjZQ4izIj1rx1xQnBTKKr.png" alt="Prompt for LLM annotation" style="width: 90%; max-width: 800px; height: auto;">
|
692 |
+
<figcaption style="font-style: italic; margin-top: 10px;">Prompt used for Llama3 annotations of the educational score, also available on <a href="https://huggingface.co/HuggingFaceFW/fineweb-edu-classifier/blob/main/utils/prompt.txt">here</a>.</figcaption>
|
693 |
</div>
|
694 |
+
<p>We also experimented with <a href="https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1">Mixtral-8x-7B-Instruct</a> and <a href="https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1">Mixtral-8x22B-Instruct</a> and a jury of all three models<d-cite bibtex-key="verga2024replacing"></d-cite> but found that Llama3 alone gave the most reliable results.</p>
|
695 |
<h3>Classifier Training</h3>
|
696 |
<p>We added a classification head with a single regression output to <a href="https://huggingface.co/Snowflake/snowflake-arctic-embed-m">Snowflake-arctic-embed</a> and trained it on 450,000 Llama 3 annotations for 20 epochs with a learning rate of 3e-4, freezing the embedding and encoder layers. We saved the checkpoint with the highest F1 score on our held-out validation set of 45k samples, treating Llama 3 annotations as ground-truth. After training, we rounded the scores to integers from 0 to 5.</p>
|
697 |
<p>We then converted the problem to a binary classification task by using a fixed threshold to determine if a file is educational. With a threshold of 3, the model achieved an F1 score of 82% on the validation set, indicating strong performance in distinguishing high-quality educational content.</p>
|
|
|
767 |
ToC += '</nav>';
|
768 |
toc.innerHTML = ToC;
|
769 |
toc.setAttribute('prerendered', 'true');
|
770 |
+
const toc_links = document.querySelectorAll('d-contents > nav a');
|
771 |
+
|
772 |
+
window.addEventListener('scroll', (_event) => {
|
773 |
+
if (typeof (headings) != 'undefined' && headings != null && typeof (toc_links) != 'undefined' && toc_links != null) {
|
774 |
+
// Then iterate forwards, on the first match highlight it and break
|
775 |
+
find_active: {
|
776 |
+
for (let i = headings.length - 1; i >= 0; i--) {
|
777 |
+
if (headings[i].getBoundingClientRect().top - 50 <= 0) {
|
778 |
+
if (!toc_links[i].classList.contains("active")) {
|
779 |
+
toc_links.forEach((link, _index) => {
|
780 |
+
link.classList.remove("active");
|
781 |
+
});
|
782 |
+
toc_links[i].classList.add('active');
|
783 |
+
}
|
784 |
+
break find_active;
|
785 |
+
}
|
786 |
+
}
|
787 |
+
toc_links.forEach((link, _index) => {
|
788 |
+
link.classList.remove("active");
|
789 |
+
});
|
790 |
+
}
|
791 |
+
}
|
792 |
+
});
|
793 |
}
|
794 |
</script>
|
795 |
</body>
|
style.css
CHANGED
@@ -137,3 +137,7 @@ d-byline .byline {
|
|
137 |
#title-plot {
|
138 |
margin-top: 0px;
|
139 |
}
|
|
|
|
|
|
|
|
|
|
137 |
#title-plot {
|
138 |
margin-top: 0px;
|
139 |
}
|
140 |
+
|
141 |
+
d-contents > nav a.active {
|
142 |
+
text-decoration: underline;
|
143 |
+
}
|