import time from uvicorn.config import LOGGING_CONFIG from typing import Union from pydantic import BaseModel from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles from fastapi.responses import FileResponse import torch from transformers import pipeline app = FastAPI(docs_url="/api/docs") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], allow_credentials=True, ) device = "cuda:0" if torch.cuda.is_available() else "cpu" torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 BATCH_SIZE = 8 pipe = pipeline("automatic-speech-recognition", "openai/whisper-large-v3", torch_dtype=torch_dtype, device=device) @app.get("/device") def getDevice(): start_time = time.time() print("Time took to process the request and return response is {} sec".format( time.time() - start_time)) return device @app.get("/transcribe") def transcribe(inputs, task): start_time = time.time() if inputs is None: raise "No audio file submitted! Please upload or record an audio file before submitting your request." text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={ "task": task}, return_timestamps=True)["text"] print("Time took to process the request and return response is {} sec".format( time.time() - start_time)) return text