Spaces:
Running
Running
import os | |
import tempfile | |
import numpy as np | |
import torch | |
from fastapi import FastAPI, File, UploadFile | |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline | |
os.environ["TRANSFORMERS_CACHE"] = "/app/cache" | |
app = FastAPI( | |
title = "Whisper API", | |
redirect_slashes=False | |
) | |
# Device configuration | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
# Load Whisper model | |
model_id = "openai/whisper-large-v3-turbo" | |
model = AutoModelForSpeechSeq2Seq.from_pretrained( | |
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True | |
).to(device) | |
processor = AutoProcessor.from_pretrained(model_id) | |
pipe = pipeline( | |
"automatic-speech-recognition", | |
model=model, | |
tokenizer=processor.tokenizer, | |
feature_extractor=processor.feature_extractor, | |
torch_dtype=torch_dtype, | |
device=device | |
) | |
async def root(): | |
return {"message": "Welcome to Whisper API!"} | |
async def transcribe_audio(file: UploadFile = File(...)): | |
try: | |
# Save the uploaded file temporarily | |
with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as temp_audio: | |
temp_audio.write(await file.read()) | |
temp_audio.flush() | |
# Transcribe the audio | |
result = pipe(temp_audio.name, return_timestamps="word") | |
return {"transcription": result["chunks"]} | |
except Exception as e: | |
return {"error": str(e)} | |