File size: 851 Bytes
4cce556
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from fastapi import FastAPI, UploadFile, File, HTTPException
import torchaudio
from speechbrain.inference.speaker import EncoderClassifier
import torch
import io

app = FastAPI()

# Load model once at startup
classifier = EncoderClassifier.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb")

@app.post("/embed")
async def get_voice_embedding(file: UploadFile = File(...)):
    if not file.filename.endswith((".wav", ".mp3", ".flac")):
        raise HTTPException(status_code=400, detail="Invalid file format")

    # Read audio bytes and load into tensor
    audio_bytes = await file.read()
    audio_tensor, sample_rate = torchaudio.load(io.BytesIO(audio_bytes))

    # Compute embedding
    with torch.no_grad():
        embeddings = classifier.encode_batch(audio_tensor)

    return {
        "embedding": embeddings.squeeze().tolist()
    }