Spaces:
No application file
No application file
File size: 851 Bytes
4cce556 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
from fastapi import FastAPI, UploadFile, File, HTTPException
import torchaudio
from speechbrain.inference.speaker import EncoderClassifier
import torch
import io
app = FastAPI()
# Load model once at startup
classifier = EncoderClassifier.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb")
@app.post("/embed")
async def get_voice_embedding(file: UploadFile = File(...)):
if not file.filename.endswith((".wav", ".mp3", ".flac")):
raise HTTPException(status_code=400, detail="Invalid file format")
# Read audio bytes and load into tensor
audio_bytes = await file.read()
audio_tensor, sample_rate = torchaudio.load(io.BytesIO(audio_bytes))
# Compute embedding
with torch.no_grad():
embeddings = classifier.encode_batch(audio_tensor)
return {
"embedding": embeddings.squeeze().tolist()
}
|