File size: 1,555 Bytes
a853668
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from fastapi import FastAPI, File, UploadFile, HTTPException
import nemo.collections.asr as nemo_asr
import torch
import shutil
import os
import uvicorn

app = FastAPI()

# Set the device (CPU or CUDA if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load and configure the ASR model
model = nemo_asr.models.ASRModel.from_pretrained(
    "ai4bharat/indicconformer_stt_hi_hybrid_rnnt_large"
)
model.freeze()  # Set to inference mode
model = model.to(device)
model.cur_decoder = "rnnt"  # Use RNNT decoder

UPLOAD_FOLDER = "./uploads"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)  # Create upload folder if it doesn't exist


@app.post("/transcribe/")
async def transcribe_audio(file: UploadFile = File(...), source_lang: str = "hi"):
    try:
        # Save the uploaded audio file to disk
        file_path = os.path.join(UPLOAD_FOLDER, file.filename)
        with open(file_path, "wb") as buffer:
            shutil.copyfileobj(file.file, buffer)

        # Perform transcription using the provided language ID
        transcription = model.transcribe(
            [file_path], batch_size=1, language_id=source_lang
        )[0]

        # Cleanup the uploaded file
        os.remove(file_path)

        return {"transcription": transcription}

    except Exception as e:
        raise HTTPException(
            status_code=500, detail=f"Error during transcription: {str(e)}"
        )


# Run the app if inside a container
if __name__ == "__main__":
    import uvicorn

    uvicorn.run(app, host="0.0.0.0", port=7860)