File size: 1,412 Bytes
fc3b1a3
 
 
a003783
 
fc3b1a3
 
a003783
fc3b1a3
 
 
a003783
 
 
 
fc3b1a3
 
 
 
 
 
a003783
 
 
 
 
 
fc3b1a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2e04e29
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from fastapi import FastAPI, HTTPException, UploadFile, File
from pydantic import BaseModel
from multiprocessing import Process, Queue
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import torch
import io
import uvicorn
import soundfile as sf

app = FastAPI()

# Cargar el modelo y el procesador
model_name = "facebook/wav2vec2-large-960h-lv60"
processor = Wav2Vec2Processor.from_pretrained(model_name)
model = Wav2Vec2ForCTC.from_pretrained(model_name)

class TranscriptionRequest(BaseModel):
    file: UploadFile

def transcribe_audio(file, queue):
    try:
        audio, _ = sf.read(io.BytesIO(file.file.read()))
        input_values = processor(audio, return_tensors="pt", padding="longest").input_values
        logits = model(input_values).logits
        predicted_ids = torch.argmax(logits, dim=-1)
        transcription = processor.batch_decode(predicted_ids)[0]
        queue.put(transcription)
    except Exception as e:
        queue.put(f"Error: {str(e)}")

@app.post("/transcribe_audio")
async def transcribe_audio(file: UploadFile = File(...)):
    queue = Queue()
    p = Process(target=transcribe_audio, args=(file, queue))
    p.start()
    p.join()
    response = queue.get()
    if "Error" in response:
        raise HTTPException(status_code=500, detail=response)
    return {"transcription": response}

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)