Spaces:
Runtime error
Runtime error
<html> | |
<head> | |
<meta content="text/html;charset=utf-8" http-equiv="Content-Type" /> | |
<title>Candle Bert</title> | |
</head> | |
<body></body> | |
</html> | |
<!DOCTYPE html> | |
<html> | |
<head> | |
<meta charset="UTF-8" /> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> | |
<style> | |
html, | |
body { | |
font-family: "Source Sans 3", sans-serif; | |
} | |
</style> | |
<script src="https://cdn.tailwindcss.com"></script> | |
<script type="module" src="./code.js"></script> | |
<script type="module"> | |
import { hcl } from "https://cdn.skypack.dev/d3-color@3"; | |
import { interpolateReds } from "https://cdn.skypack.dev/d3-scale-chromatic@3"; | |
import { scaleLinear } from "https://cdn.skypack.dev/d3-scale@4"; | |
import { | |
getModelInfo, | |
getEmbeddings, | |
getWikiText, | |
cosineSimilarity, | |
} from "./utils.js"; | |
const bertWorker = new Worker("./bertWorker.js", { | |
type: "module", | |
}); | |
const inputContainerEL = document.querySelector("#input-container"); | |
const textAreaEl = document.querySelector("#input-area"); | |
const outputAreaEl = document.querySelector("#output-area"); | |
const formEl = document.querySelector("#form"); | |
const searchInputEl = document.querySelector("#search-input"); | |
const formWikiEl = document.querySelector("#form-wiki"); | |
const searchWikiEl = document.querySelector("#search-wiki"); | |
const outputStatusEl = document.querySelector("#output-status"); | |
const modelSelectEl = document.querySelector("#model"); | |
const sentencesRegex = | |
/(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<![A-Z]\.)(?<=\.|\?)\s/gm; | |
let sentenceEmbeddings = []; | |
let currInputText = ""; | |
let isCalculating = false; | |
function toggleTextArea(state) { | |
if (state) { | |
textAreaEl.hidden = false; | |
textAreaEl.focus(); | |
} else { | |
textAreaEl.hidden = true; | |
} | |
} | |
inputContainerEL.addEventListener("focus", (e) => { | |
toggleTextArea(true); | |
}); | |
textAreaEl.addEventListener("blur", (e) => { | |
toggleTextArea(false); | |
}); | |
textAreaEl.addEventListener("focusout", (e) => { | |
toggleTextArea(false); | |
if (currInputText === textAreaEl.value || isCalculating) return; | |
populateOutputArea(textAreaEl.value); | |
calculateEmbeddings(textAreaEl.value); | |
}); | |
modelSelectEl.addEventListener("change", (e) => { | |
const query = new URLSearchParams(window.location.search); | |
query.set("model", modelSelectEl.value); | |
window.history.replaceState( | |
{}, | |
"", | |
`${window.location.pathname}?${query}` | |
); | |
window.parent.postMessage({ queryString: "?" + queryString }, "*") | |
if (currInputText === "" || isCalculating) return; | |
populateOutputArea(textAreaEl.value); | |
calculateEmbeddings(textAreaEl.value); | |
}); | |
function populateOutputArea(text) { | |
currInputText = text; | |
const sentences = text.split(sentencesRegex); | |
outputAreaEl.innerHTML = ""; | |
for (const [id, sentence] of sentences.entries()) { | |
const sentenceEl = document.createElement("span"); | |
sentenceEl.id = `sentence-${id}`; | |
sentenceEl.innerText = sentence + " "; | |
outputAreaEl.appendChild(sentenceEl); | |
} | |
} | |
formEl.addEventListener("submit", async (e) => { | |
e.preventDefault(); | |
if (isCalculating || currInputText === "") return; | |
toggleInputs(true); | |
const modelID = modelSelectEl.value; | |
const { modelURL, tokenizerURL, configURL, search_prefix } = | |
getModelInfo(modelID); | |
const text = searchInputEl.value; | |
const query = search_prefix + searchInputEl.value; | |
outputStatusEl.classList.remove("invisible"); | |
outputStatusEl.innerText = "Calculating embeddings for query..."; | |
isCalculating = true; | |
const out = await getEmbeddings( | |
bertWorker, | |
modelURL, | |
tokenizerURL, | |
configURL, | |
modelID, | |
[query] | |
); | |
outputStatusEl.classList.add("invisible"); | |
const queryEmbeddings = out.output[0]; | |
// calculate cosine similarity with all sentences given the query | |
const distances = sentenceEmbeddings | |
.map((embedding, id) => ({ | |
id, | |
similarity: cosineSimilarity(queryEmbeddings, embedding), | |
})) | |
.sort((a, b) => b.similarity - a.similarity) | |
// getting top 10 most similar sentences | |
.slice(0, 10); | |
const colorScale = scaleLinear() | |
.domain([ | |
distances[distances.length - 1].similarity, | |
distances[0].similarity, | |
]) | |
.range([0, 1]) | |
.interpolate(() => interpolateReds); | |
outputAreaEl.querySelectorAll("span").forEach((el) => { | |
el.style.color = "unset"; | |
el.style.backgroundColor = "unset"; | |
}); | |
distances.forEach((d) => { | |
const el = outputAreaEl.querySelector(`#sentence-${d.id}`); | |
const color = colorScale(d.similarity); | |
const fontColor = hcl(color).l < 70 ? "white" : "black"; | |
el.style.color = fontColor; | |
el.style.backgroundColor = color; | |
}); | |
outputAreaEl | |
.querySelector(`#sentence-${distances[0].id}`) | |
.scrollIntoView({ | |
behavior: "smooth", | |
block: "center", | |
inline: "nearest", | |
}); | |
isCalculating = false; | |
toggleInputs(false); | |
}); | |
async function calculateEmbeddings(text) { | |
isCalculating = true; | |
toggleInputs(true); | |
const modelID = modelSelectEl.value; | |
const { modelURL, tokenizerURL, configURL, document_prefix } = | |
getModelInfo(modelID); | |
const sentences = text.split(sentencesRegex); | |
const allEmbeddings = []; | |
outputStatusEl.classList.remove("invisible"); | |
for (const [id, sentence] of sentences.entries()) { | |
const query = document_prefix + sentence; | |
outputStatusEl.innerText = `Calculating embeddings: sentence ${ | |
id + 1 | |
} of ${sentences.length}`; | |
const embeddings = await getEmbeddings( | |
bertWorker, | |
modelURL, | |
tokenizerURL, | |
configURL, | |
modelID, | |
[query], | |
updateStatus | |
); | |
allEmbeddings.push(embeddings); | |
} | |
outputStatusEl.classList.add("invisible"); | |
sentenceEmbeddings = allEmbeddings.map((e) => e.output[0]); | |
isCalculating = false; | |
toggleInputs(false); | |
} | |
function updateStatus(data) { | |
if ("status" in data) { | |
if (data.status === "loading") { | |
outputStatusEl.innerText = data.message; | |
outputStatusEl.classList.remove("invisible"); | |
} | |
} | |
} | |
function toggleInputs(state) { | |
const interactive = document.querySelectorAll(".interactive"); | |
interactive.forEach((el) => { | |
if (state) { | |
el.disabled = true; | |
} else { | |
el.disabled = false; | |
} | |
}); | |
} | |
searchWikiEl.addEventListener("input", () => { | |
searchWikiEl.setCustomValidity(""); | |
}); | |
formWikiEl.addEventListener("submit", async (e) => { | |
e.preventDefault(); | |
if ("example" in e.submitter.dataset) { | |
searchWikiEl.value = e.submitter.innerText; | |
} | |
const text = searchWikiEl.value; | |
if (isCalculating || text === "") return; | |
try { | |
const wikiText = await getWikiText(text); | |
searchWikiEl.setCustomValidity(""); | |
textAreaEl.innerHTML = wikiText; | |
populateOutputArea(wikiText); | |
calculateEmbeddings(wikiText); | |
searchWikiEl.value = ""; | |
} catch { | |
searchWikiEl.setCustomValidity("Invalid Wikipedia article name"); | |
searchWikiEl.reportValidity(); | |
} | |
}); | |
document.addEventListener("DOMContentLoaded", () => { | |
const query = new URLSearchParams(window.location.search); | |
const modelID = query.get("model"); | |
if (modelID) { | |
modelSelectEl.value = modelID; | |
modelSelectEl.dispatchEvent(new Event("change")); | |
} | |
}); | |
</script> | |
</head> | |
<body class="container max-w-4xl mx-auto p-4"> | |
<main class="grid grid-cols-1 gap-5 relative"> | |
<span class="absolute text-5xl -ml-[1em]"> 🕯️ </span> | |
<div> | |
<h1 class="text-5xl font-bold">Candle BERT</h1> | |
<h2 class="text-2xl font-bold">Rust/WASM Demo</h2> | |
<p class="max-w-lg"> | |
Running sentence embeddings and similarity search in the browser using | |
the Bert Model written with | |
<a | |
href="https://github.com/huggingface/candle/" | |
target="_blank" | |
class="underline hover:text-blue-500 hover:no-underline" | |
>Candle | |
</a> | |
and compiled to Wasm. Embeddings models from are from | |
<a | |
href="https://huggingface.co/sentence-transformers/" | |
target="_blank" | |
class="underline hover:text-blue-500 hover:no-underline"> | |
Sentence Transformers | |
</a> | |
and | |
<a | |
href="https://huggingface.co/intfloat/" | |
target="_blank" | |
class="underline hover:text-blue-500 hover:no-underline"> | |
Liang Wang - e5 Models | |
</a> | |
</p> | |
</div> | |
<div> | |
<label for="model" class="font-medium block">Models Options: </label> | |
<select | |
id="model" | |
class="border-2 border-gray-500 rounded-md font-light interactive disabled:cursor-not-allowed w-full max-w-max"> | |
<option value="gte_tiny">gte_tiny (45.5 MB)</option> | |
<option value="intfloat_e5_small_v2" selected> | |
intfloat/e5-small-v2 (133 MB) | |
</option> | |
<option value="intfloat_e5_base_v2"> | |
intfloat/e5-base-v2 (438 MB) | |
</option> | |
<option value="intfloat_multilingual_e5_small"> | |
intfloat/multilingual-e5-small (471 MB) | |
</option> | |
<option value="sentence_transformers_all_MiniLM_L6_v2"> | |
sentence-transformers/all-MiniLM-L6-v2 (90.9 MB) | |
</option> | |
<option value="sentence_transformers_all_MiniLM_L12_v2"> | |
sentence-transformers/all-MiniLM-L12-v2 (133 MB) | |
</option> | |
</select> | |
</div> | |
<div> | |
<h3 class="font-medium">Examples:</h3> | |
<form | |
id="form-wiki" | |
class="flex text-xs rounded-md justify-between w-min gap-3"> | |
<input type="submit" hidden /> | |
<button data-example class="disabled:cursor-not-allowed interactive"> | |
Pizza | |
</button> | |
<button data-example class="disabled:cursor-not-allowed interactive"> | |
Paris | |
</button> | |
<button data-example class="disabled:cursor-not-allowed interactive"> | |
Physics | |
</button> | |
<input | |
type="text" | |
id="search-wiki" | |
title="Search Wikipedia article by title" | |
class="font-light py-0 mx-1 resize-none outline-none w-32 disabled:cursor-not-allowed interactive" | |
placeholder="Load Wikipedia article..." /> | |
<button | |
title="Search Wikipedia article and load into input" | |
class="bg-gray-700 hover:bg-gray-800 text-white font-normal px-2 py-1 rounded disabled:bg-gray-300 disabled:cursor-not-allowed interactive"> | |
Load | |
</button> | |
</form> | |
</div> | |
<form | |
id="form" | |
class="flex text-normal px-1 py-1 border border-gray-700 rounded-md items-center"> | |
<input type="submit" hidden /> | |
<input | |
type="text" | |
id="search-input" | |
class="font-light w-full px-3 py-2 mx-1 resize-none outline-none interactive disabled:cursor-not-allowed" | |
placeholder="Search query here..." /> | |
<button | |
class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-2 w-16 rounded disabled:bg-gray-300 disabled:cursor-not-allowed interactive"> | |
Search | |
</button> | |
</form> | |
<div> | |
<h3 class="font-medium">Input text:</h3> | |
<div class="flex justify-between items-center"> | |
<div class="rounded-md inline text-xs"> | |
<span id="output-status" class="m-auto font-light invisible" | |
>C</span | |
> | |
</div> | |
</div> | |
<div | |
id="input-container" | |
tabindex="0" | |
class="min-h-[250px] bg-slate-100 text-gray-500 rounded-md p-4 flex flex-col gap-2 relative"> | |
<textarea | |
id="input-area" | |
hidden | |
value="" | |
placeholder="Input text to perform semantic similarity search..." | |
class="flex-1 resize-none outline-none left-0 right-0 top-0 bottom-0 m-4 absolute interactive disabled:invisible"></textarea> | |
<p id="output-area" class="grid-rows-2"> | |
Input text to perform semantic similarity search... | |
</p> | |
</div> | |
</div> | |
</main> | |
</body> | |
</html> | |