File size: 3,319 Bytes
270736b
 
 
 
417e147
 
 
270736b
 
 
 
 
 
 
9c68228
270736b
669dcca
 
270736b
 
 
 
 
 
 
 
 
 
417e147
 
d5b2d00
 
417e147
d5b2d00
417e147
270736b
 
d5b2d00
270736b
 
 
 
d5b2d00
270736b
 
 
 
d5b2d00
 
 
270736b
d5b2d00
 
270736b
 
 
 
669dcca
 
270736b
 
669dcca
 
 
 
 
 
270736b
 
417e147
 
669dcca
417e147
 
669dcca
 
 
270736b
669dcca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c5bb903
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import time
from fastapi import APIRouter, Depends, HTTPException, status
from faster_whisper import WhisperModel

import os

from libs.convert_to_audio import convert_to_audio
from libs.header_api_auth import get_api_key

router = APIRouter(prefix="/get-transcript", tags=["transcript"])

# model_size: distil-large-v2
# model_size: distil-large-v3

# api_key: str = Depends(get_api_key)
@router.get("/")
def get_transcript(audio_path: str, model_size: str = "distil-large-v3", api_key: str = Depends(get_api_key)):
    # Run on GPU with FP16
    # model = WhisperModel(model_size, device="cuda", compute_type="float16")

    # or run on GPU with INT8
    # model = WhisperModel(model_size, device="cuda", cosmpute_type="int8_float16")
    # or run on CPU with INT8
    # model_run = WhisperModel(model_size, device="cpu", compute_type="int8")


    print(f"model>>>: {model_size}")

    output_audio_folder = f"./cached/audio"

    # if not os.path.exists(output_audio_folder):
    #     os.makedirs(output_audio_folder)

    # output_file = f"{output_audio_folder}/{audio_path.split('/')[-1].split(".")[0]}.mp3"

    st = time.time()

    # convert_to_audio(audio_path.strip(), output_file)

    try:
        model_run = WhisperModel(model_size, device="cpu", compute_type="int8")
        segments, info = model_run.transcribe(
            audio_path,
            beam_size=16,
            language="en",
            condition_on_previous_text=False,
        )

        # if os.path.exists(output_file):
        #     os.remove(output_file)
    except Exception as error:
        # if os.path.exists(output_file):
        #     os.remove(output_file)
        raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=f"error>>>: {error}")

    text = ""

    listSentences = []

    for segment in segments:
        text += segment.text
        listSentences.append({
            "start_time": segment.start,
            "end_time": segment.end,
            "text": segment.text
        })

    et = time.time()
    elapsed_time = et - st


    return {"text": text,
            'list_sentence':  listSentences,
            'elapsed_time': round(elapsed_time, 2)
            }

#     time.sleep(5)

#     return {
#     "text": " She has a dimble on her left cheek, it adds charm to her facial features. The dimple is a genetic trait that she inherited from her mother. She's always been proud of it. People compliment her on it. She can't help but smile wider.",
#     "list_sentence": [
#         {
#             "start_time": 0.0,
#             "end_time": 8.0,
#             "text": " She has a dimble on her left cheek, it adds charm to her facial features."
#         },
#         {
#             "start_time": 8.0,
#             "end_time": 16.0,
#             "text": " The dimple is a genetic trait that she inherited from her mother."
#         },
#         {
#             "start_time": 16.0,
#             "end_time": 20.0,
#             "text": " She's always been proud of it."
#         },
#         {
#             "start_time": 20.0,
#             "end_time": 24.0,
#             "text": " People compliment her on it."
#         },
#         {
#             "start_time": 24.0,
#             "end_time": 28.0,
#             "text": " She can't help but smile wider."
#         }
#     ]
# }