File size: 1,512 Bytes
8ae43fc
 
d193f9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ae43fc
 
 
d193f9f
 
 
 
 
8ae43fc
 
d193f9f
 
 
 
 
 
8ae43fc
 
d193f9f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import time
from uvicorn.config import LOGGING_CONFIG
from typing import Union
from pydantic import BaseModel
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware

from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse

import torch
from transformers import pipeline

app = FastAPI(docs_url="/api/docs")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
    allow_credentials=True,
)

device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

BATCH_SIZE = 8


pipe = pipeline("automatic-speech-recognition",
                "openai/whisper-large-v3",
                torch_dtype=torch_dtype,
                device=device)


@app.get("/device")
def getDevice():
    start_time = time.time()
    print("Time took to process the request and return response is {} sec".format(
        time.time() - start_time))
    return device


@app.get("/transcribe")
def transcribe(inputs, task):
    start_time = time.time()

    if inputs is None:
        raise "No audio file submitted! Please upload or record an audio file before submitting your request."

    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={
                "task": task}, return_timestamps=True)["text"]

    print("Time took to process the request and return response is {} sec".format(
        time.time() - start_time))
    return text