Spaces:
Running
Running
File size: 1,412 Bytes
fc3b1a3 a003783 fc3b1a3 a003783 fc3b1a3 a003783 fc3b1a3 a003783 fc3b1a3 2e04e29 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
from fastapi import FastAPI, HTTPException, UploadFile, File
from pydantic import BaseModel
from multiprocessing import Process, Queue
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import torch
import io
import uvicorn
import soundfile as sf
app = FastAPI()
# Cargar el modelo y el procesador
model_name = "facebook/wav2vec2-large-960h-lv60"
processor = Wav2Vec2Processor.from_pretrained(model_name)
model = Wav2Vec2ForCTC.from_pretrained(model_name)
class TranscriptionRequest(BaseModel):
file: UploadFile
def transcribe_audio(file, queue):
try:
audio, _ = sf.read(io.BytesIO(file.file.read()))
input_values = processor(audio, return_tensors="pt", padding="longest").input_values
logits = model(input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = processor.batch_decode(predicted_ids)[0]
queue.put(transcription)
except Exception as e:
queue.put(f"Error: {str(e)}")
@app.post("/transcribe_audio")
async def transcribe_audio(file: UploadFile = File(...)):
queue = Queue()
p = Process(target=transcribe_audio, args=(file, queue))
p.start()
p.join()
response = queue.get()
if "Error" in response:
raise HTTPException(status_code=500, detail=response)
return {"transcription": response}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)
|