Yash Chauhan commited on
Commit
9792cba
·
1 Parent(s): 3b3ca65

[updated] code

Browse files
Files changed (3) hide show
  1. Dockerfile +18 -0
  2. app.py +50 -0
  3. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python image with CUDA support if using GPU
2
+ FROM nvidia/cuda:12.1.1-devel-ubuntu22.04
3
+
4
+ RUN useradd -m -u 1000 user
5
+ USER user
6
+ ENV PATH="/home/user/.local/bin:$PATH"
7
+
8
+ WORKDIR /app
9
+
10
+ RUN apt-get update && apt-get install -y \
11
+ python3-pip \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ COPY --chown=user ./requirements.txt requirements.txt
15
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
16
+ COPY --chown=user . /app
17
+
18
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+
3
+ import numpy as np
4
+ import torch
5
+ from fastapi import FastAPI, File, UploadFile
6
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
7
+
8
+ app = FastAPI()
9
+
10
+ # Device configuration
11
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
12
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
13
+
14
+ # Load Whisper model
15
+ model_id = "openai/whisper-large-v3-turbo"
16
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
17
+ model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
18
+ ).to(device)
19
+
20
+ processor = AutoProcessor.from_pretrained(model_id)
21
+
22
+ pipe = pipeline(
23
+ "automatic-speech-recognition",
24
+ model=model,
25
+ tokenizer=processor.tokenizer,
26
+ feature_extractor=processor.feature_extractor,
27
+ torch_dtype=torch_dtype,
28
+ device=device
29
+ )
30
+
31
+ @app.get("/")
32
+ async def root():
33
+ return {"message": "Welcome to Whisper API!"}
34
+
35
+ @app.post("/transcribe/")
36
+ async def transcribe_audio(file: UploadFile = File(...)):
37
+ try:
38
+ # Save the uploaded file temporarily
39
+ with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as temp_audio:
40
+ temp_audio.write(await file.read())
41
+ temp_audio.flush()
42
+
43
+ # Transcribe the audio
44
+ result = pipe(temp_audio.name, return_timestamps="word")
45
+
46
+ return {"transcription": result["chunks"]}
47
+
48
+ except Exception as e:
49
+ return {"error": str(e)}
50
+
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ fastapi
4
+ uvicorn
5
+ pydantic
6
+ numpy