from fastapi import FastAPI, File, UploadFile, HTTPException import nemo.collections.asr as nemo_asr import torch import shutil import os import uvicorn app = FastAPI() # Set the device (CPU or CUDA if available) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Load and configure the ASR model model = nemo_asr.models.ASRModel.from_pretrained( "ai4bharat/indicconformer_stt_hi_hybrid_rnnt_large" ) model.freeze() # Set to inference mode model = model.to(device) model.cur_decoder = "rnnt" # Use RNNT decoder UPLOAD_FOLDER = "./uploads" os.makedirs(UPLOAD_FOLDER, exist_ok=True) # Create upload folder if it doesn't exist @app.post("/transcribe/") async def transcribe_audio(file: UploadFile = File(...), source_lang: str = "hi"): try: # Save the uploaded audio file to disk file_path = os.path.join(UPLOAD_FOLDER, file.filename) with open(file_path, "wb") as buffer: shutil.copyfileobj(file.file, buffer) # Perform transcription using the provided language ID transcription = model.transcribe( [file_path], batch_size=1, language_id=source_lang )[0] # Cleanup the uploaded file os.remove(file_path) return {"transcription": transcription} except Exception as e: raise HTTPException( status_code=500, detail=f"Error during transcription: {str(e)}" ) # Run the app if inside a container if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)