|
<html> |
|
<head> |
|
<meta content="text/html;charset=utf-8" http-equiv="Content-Type" /> |
|
<title>Candle Whisper Rust/WASM</title> |
|
</head> |
|
<body></body> |
|
</html> |
|
|
|
<!DOCTYPE html> |
|
<html> |
|
<head> |
|
<meta charset="UTF-8" /> |
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> |
|
<style> |
|
@import url("https://fonts.googleapis.com/css2?family=Source+Code+Pro:wght@200;300;400&family=Source+Sans+3:wght@100;200;300;400;500;600;700;800;900&display=swap"); |
|
html, |
|
body { |
|
font-family: "Source Sans 3", sans-serif; |
|
} |
|
</style> |
|
<script src="https://cdn.tailwindcss.com"></script> |
|
<script type="module"> |
|
|
|
const AUDIO_BASE_URL = |
|
"https://huggingface.co/datasets/Narsil/candle-examples/resolve/main/"; |
|
|
|
|
|
const MODELS = { |
|
tiny_multilingual: { |
|
base_url: "https://huggingface.co/openai/whisper-tiny/resolve/main/", |
|
model: "model.safetensors", |
|
tokenizer: "tokenizer.json", |
|
config: "config.json", |
|
size: "151 MB", |
|
}, |
|
tiny_en: { |
|
base_url: |
|
"https://huggingface.co/openai/whisper-tiny.en/resolve/main/", |
|
model: "model.safetensors", |
|
tokenizer: "tokenizer.json", |
|
config: "config.json", |
|
size: "151 MB", |
|
}, |
|
tiny_quantized_multilingual_q80: { |
|
base_url: "https://huggingface.co/lmz/candle-whisper/resolve/main/", |
|
model: "model-tiny-q80.gguf", |
|
tokenizer: "tokenizer-tiny.json", |
|
config: "config-tiny.json", |
|
size: "41.5 MB", |
|
}, |
|
tiny_en_quantized_q80: { |
|
base_url: "https://huggingface.co/lmz/candle-whisper/resolve/main/", |
|
model: "model-tiny-en-q80.gguf", |
|
tokenizer: "tokenizer-tiny-en.json", |
|
config: "config-tiny-en.json", |
|
size: "41.8 MB", |
|
}, |
|
distil_medium_en: { |
|
base_url: |
|
"https://huggingface.co/distil-whisper/distil-medium.en/resolve/main/", |
|
model: "model.safetensors", |
|
tokenizer: "tokenizer.json", |
|
config: "config.json", |
|
size: "789 MB", |
|
}, |
|
}; |
|
|
|
const modelEl = document.querySelector("#model"); |
|
|
|
Object.keys(MODELS).forEach((modelID) => { |
|
const model = MODELS[modelID]; |
|
const option = document.createElement("option"); |
|
option.value = modelID; |
|
option.textContent = `${modelID} (${model.size})`; |
|
modelEl.appendChild(option); |
|
}); |
|
const whisperWorker = new Worker("./whisperWorker.js", { |
|
type: "module", |
|
}); |
|
|
|
async function classifyAudio( |
|
weightsURL, |
|
modelID, |
|
tokenizerURL, |
|
configURL, |
|
mel_filtersURL, |
|
audioURL, |
|
updateStatus |
|
) { |
|
return new Promise((resolve, reject) => { |
|
whisperWorker.postMessage({ |
|
weightsURL, |
|
modelID, |
|
tokenizerURL, |
|
configURL, |
|
mel_filtersURL, |
|
audioURL, |
|
}); |
|
function messageHandler(event) { |
|
console.log(event.data); |
|
if ("status" in event.data) { |
|
updateStatus(event.data); |
|
} |
|
if ("error" in event.data) { |
|
whisperWorker.removeEventListener("message", messageHandler); |
|
reject(new Error(event.data.error)); |
|
} |
|
if (event.data.status === "complete") { |
|
whisperWorker.removeEventListener("message", messageHandler); |
|
resolve(event.data); |
|
} |
|
} |
|
whisperWorker.addEventListener("message", messageHandler); |
|
}); |
|
} |
|
|
|
|
|
let audioURL = null; |
|
function setAudio(src) { |
|
const audio = document.querySelector("#audio"); |
|
audio.src = src; |
|
audio.controls = true; |
|
audio.hidden = false; |
|
document.querySelector("#detect").disabled = false; |
|
audioURL = src; |
|
} |
|
|
|
document.querySelectorAll("#audios-select > button").forEach((target) => { |
|
target.addEventListener("click", (e) => { |
|
const value = target.dataset.value; |
|
const href = AUDIO_BASE_URL + value; |
|
setAudio(href); |
|
}); |
|
}); |
|
|
|
document.querySelector("#file-upload").addEventListener("change", (e) => { |
|
const target = e.target; |
|
if (target.files.length > 0) { |
|
const href = URL.createObjectURL(target.files[0]); |
|
setAudio(href); |
|
} |
|
}); |
|
|
|
const dropArea = document.querySelector("#drop-area"); |
|
dropArea.addEventListener("dragenter", (e) => { |
|
e.preventDefault(); |
|
dropArea.classList.add("border-blue-700"); |
|
}); |
|
dropArea.addEventListener("dragleave", (e) => { |
|
e.preventDefault(); |
|
dropArea.classList.remove("border-blue-700"); |
|
}); |
|
dropArea.addEventListener("dragover", (e) => { |
|
e.preventDefault(); |
|
dropArea.classList.add("border-blue-700"); |
|
}); |
|
dropArea.addEventListener("drop", (e) => { |
|
e.preventDefault(); |
|
dropArea.classList.remove("border-blue-700"); |
|
const url = e.dataTransfer.getData("text/uri-list"); |
|
const files = e.dataTransfer.files; |
|
if (files.length > 0) { |
|
const href = URL.createObjectURL(files[0]); |
|
setAudio(href); |
|
} else if (url) { |
|
setAudio(url); |
|
} |
|
}); |
|
|
|
|
|
document.querySelector("#detect").addEventListener("click", async () => { |
|
if (audioURL === null) { |
|
return; |
|
} |
|
const modelID = modelEl.value; |
|
const model = MODELS[modelID]; |
|
const modelURL = model.base_url + model.model; |
|
const tokenizerURL = model.base_url + model.tokenizer; |
|
const configURL = model.base_url + model.config; |
|
|
|
classifyAudio( |
|
modelURL, |
|
modelID, |
|
tokenizerURL, |
|
configURL, |
|
"mel_filters.safetensors", |
|
audioURL, |
|
updateStatus |
|
) |
|
.then((result) => { |
|
console.log("RESULT", result); |
|
const { output } = result; |
|
const text = output.map((segment) => segment.dr.text).join(" "); |
|
console.log(text); |
|
document.querySelector("#output-status").hidden = true; |
|
document.querySelector("#output-generation").hidden = false; |
|
document.querySelector("#output-generation").textContent = text; |
|
}) |
|
.catch((error) => { |
|
console.error(error); |
|
}); |
|
}); |
|
|
|
function updateStatus(data) { |
|
const { status, message } = data; |
|
const button = document.querySelector("#detect"); |
|
if (status === "decoding" || status === "loading") { |
|
button.disabled = true; |
|
button.textContent = message; |
|
} else if (status === "complete") { |
|
button.disabled = false; |
|
button.textContent = "Transcribe Audio"; |
|
} |
|
} |
|
</script> |
|
</head> |
|
<body class="container max-w-4xl mx-auto p-4"> |
|
<main class="grid grid-cols-1 gap-8 relative"> |
|
<span class="absolute text-5xl -ml-[1em]"> 🕯️ </span> |
|
<div> |
|
<h1 class="text-5xl font-bold">Candle Whisper</h1> |
|
<h2 class="text-2xl font-bold">Rust/WASM Demo</h2> |
|
<p class="max-w-lg"> |
|
Transcribe audio in the browser using rust/wasm with an audio file. |
|
This demo uses the |
|
<a |
|
href="https://huggingface.co/openai/" |
|
target="_blank" |
|
class="underline hover:text-blue-500 hover:no-underline"> |
|
OpenAI Whisper models |
|
</a> |
|
and WASM runtime built with |
|
<a |
|
href="https://github.com/huggingface/candle/" |
|
target="_blank" |
|
class="underline hover:text-blue-500 hover:no-underline" |
|
>Candle |
|
</a> |
|
</p> |
|
</div> |
|
|
|
<div> |
|
<label for="model" class="font-medium">Models Options: </label> |
|
<select |
|
id="model" |
|
class="border-2 border-gray-500 rounded-md font-light"> |
|
</select> |
|
</div> |
|
|
|
<div class="relative"> |
|
<div |
|
id="drop-area" |
|
class="flex flex-col items-center justify-center border-2 border-gray-300 border-dashed rounded-xl relative h-48 w-full overflow-hidden"> |
|
<div |
|
class="flex flex-col items-center justify-center space-y-1 text-center"> |
|
<svg |
|
width="25" |
|
height="25" |
|
viewBox="0 0 25 25" |
|
fill="none" |
|
xmlns="http://www.w3.org/2000/svg"> |
|
<path |
|
d="M3.5 24.3a3 3 0 0 1-1.9-.8c-.5-.5-.8-1.2-.8-1.9V2.9c0-.7.3-1.3.8-1.9.6-.5 1.2-.7 2-.7h18.6c.7 0 1.3.2 1.9.7.5.6.7 1.2.7 2v18.6c0 .7-.2 1.4-.7 1.9a3 3 0 0 1-2 .8H3.6Zm0-2.7h18.7V2.9H3.5v18.7Zm2.7-2.7h13.3c.3 0 .5 0 .6-.3v-.7l-3.7-5a.6.6 0 0 0-.6-.2c-.2 0-.4 0-.5.3l-3.5 4.6-2.4-3.3a.6.6 0 0 0-.6-.3c-.2 0-.4.1-.5.3l-2.7 3.6c-.1.2-.2.4 0 .7.1.2.3.3.6.3Z" |
|
fill="#000" /> |
|
</svg> |
|
<div class="flex text-sm text-gray-600"> |
|
<label |
|
for="file-upload" |
|
class="relative cursor-pointer bg-white rounded-md font-medium text-blue-950 hover:text-blue-700"> |
|
<span>Drag and drop your audio here</span> |
|
<span class="block text-xs">or</span> |
|
<span class="block text-xs">Click to upload</span> |
|
</label> |
|
</div> |
|
<input |
|
id="file-upload" |
|
name="file-upload" |
|
type="file" |
|
accept="audio/*" |
|
class="sr-only" /> |
|
</div> |
|
<audio |
|
id="audio" |
|
hidden |
|
controls |
|
class="w-full p-2 select-none"></audio> |
|
</div> |
|
</div> |
|
<div> |
|
<div class="flex flex-wrap gap-3 items-center" id="audios-select"> |
|
<h3 class="font-medium">Examples:</h3> |
|
<button |
|
data-value="samples_jfk.wav" |
|
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline"> |
|
<span>jfk.wav</span> |
|
<span class="text-xs block"> (352 kB)</span> |
|
</button> |
|
<button |
|
data-value="samples_a13.wav" |
|
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline"> |
|
<span>a13.wav</span> |
|
<span class="text-xs block"> (960 kB)</span> |
|
</button> |
|
<button |
|
data-value="samples_mm0.wav" |
|
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline"> |
|
<span>mm0.wav</span> |
|
<span class="text-xs block new"> (957 kB)</span> |
|
</button> |
|
<button |
|
data-value="samples_gb0.wav" |
|
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline"> |
|
<span>gb0.wav </span> |
|
<span class="text-xs block">(4.08 MB)</span> |
|
</button> |
|
<button |
|
data-value="samples_gb1.wav" |
|
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline"> |
|
<span>gb1.wav </span> |
|
<span class="text-xs block">(6.36 MB)</span> |
|
</button> |
|
<button |
|
data-value="samples_hp0.wav" |
|
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline"> |
|
<span>hp0.wav </span> |
|
<span class="text-xs block">(8.75 MB)</span> |
|
</button> |
|
</div> |
|
</div> |
|
|
|
<div> |
|
<button |
|
id="detect" |
|
disabled |
|
class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-2 px-4 rounded disabled:bg-gray-300 disabled:cursor-not-allowed"> |
|
Transcribe Audio |
|
</button> |
|
</div> |
|
<div> |
|
<h3 class="font-medium">Transcription:</h3> |
|
<div |
|
class="min-h-[250px] bg-slate-100 text-gray-500 p-4 rounded-md flex flex-col gap-2"> |
|
<p hidden id="output-generation" class="grid-rows-2"></p> |
|
<span id="output-status" class="m-auto font-light" |
|
>No transcription results yet</span |
|
> |
|
</div> |
|
</div> |
|
</main> |
|
</body> |
|
</html> |
|
|