Darshan commited on
Commit
a853668
·
1 Parent(s): 02fa6ef
Files changed (3) hide show
  1. Dockerfile +29 -0
  2. app.py +53 -0
  3. requirements.txt +4 -0
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use a lightweight Python image
2
+ FROM python:3.9-slim
3
+
4
+ # Install system dependencies
5
+ RUN apt-get update && apt-get install -y \
6
+ git ffmpeg wget && \
7
+ rm -rf /var/lib/apt/lists/*
8
+
9
+ # Set working directory
10
+ WORKDIR /app
11
+
12
+ # Copy requirements and install dependencies
13
+ COPY requirements.txt .
14
+ RUN pip install --no-cache-dir -r requirements.txt
15
+
16
+ # Clone NeMo from the specific branch and install it
17
+ RUN git clone https://github.com/AI4Bharat/NeMo.git && \
18
+ cd NeMo && \
19
+ git checkout nemo-v2 && \
20
+ bash reinstall.sh
21
+
22
+ # Copy all code to the working directory
23
+ COPY . .
24
+
25
+ # Expose the required port
26
+ EXPOSE 7860
27
+
28
+ # Run the FastAPI app with Uvicorn
29
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile, HTTPException
2
+ import nemo.collections.asr as nemo_asr
3
+ import torch
4
+ import shutil
5
+ import os
6
+ import uvicorn
7
+
8
+ app = FastAPI()
9
+
10
+ # Set the device (CPU or CUDA if available)
11
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
+
13
+ # Load and configure the ASR model
14
+ model = nemo_asr.models.ASRModel.from_pretrained(
15
+ "ai4bharat/indicconformer_stt_hi_hybrid_rnnt_large"
16
+ )
17
+ model.freeze() # Set to inference mode
18
+ model = model.to(device)
19
+ model.cur_decoder = "rnnt" # Use RNNT decoder
20
+
21
+ UPLOAD_FOLDER = "./uploads"
22
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True) # Create upload folder if it doesn't exist
23
+
24
+
25
+ @app.post("/transcribe/")
26
+ async def transcribe_audio(file: UploadFile = File(...), source_lang: str = "hi"):
27
+ try:
28
+ # Save the uploaded audio file to disk
29
+ file_path = os.path.join(UPLOAD_FOLDER, file.filename)
30
+ with open(file_path, "wb") as buffer:
31
+ shutil.copyfileobj(file.file, buffer)
32
+
33
+ # Perform transcription using the provided language ID
34
+ transcription = model.transcribe(
35
+ [file_path], batch_size=1, language_id=source_lang
36
+ )[0]
37
+
38
+ # Cleanup the uploaded file
39
+ os.remove(file_path)
40
+
41
+ return {"transcription": transcription}
42
+
43
+ except Exception as e:
44
+ raise HTTPException(
45
+ status_code=500, detail=f"Error during transcription: {str(e)}"
46
+ )
47
+
48
+
49
+ # Run the app if inside a container
50
+ if __name__ == "__main__":
51
+ import uvicorn
52
+
53
+ uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ torch
4
+ ffmpeg-python