File size: 1,842 Bytes
a9e849e
c5bb903
 
 
a9e849e
 
c5bb903
 
3c36fb5
 
c5bb903
3c36fb5
c5bb903
 
 
 
 
a9e849e
c5bb903
 
a9e849e
 
 
 
 
3c36fb5
 
 
 
 
a9e849e
c5bb903
4fef98a
c5bb903
 
a9e849e
 
 
c5bb903
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
import time
from fastapi import APIRouter, Depends, HTTPException, status

from libs.convert_to_audio import convert_to_audio
from libs.transformer.get_transcript import get_transcript_gpu
from libs.transformer.get_transcript_2 import get_transcribe_transformers

from langchain_community.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader

from libs.header_api_auth import get_api_key
from libs.transformer.youtube_download import download_youtube


router = APIRouter(prefix="/get-transcript-transformer", tags=["transcript"])

@router.get("/")
def get_transcript(audio_path: str, model_size: str = "distil-whisper/distil-small.en", api_key: str = Depends(get_api_key)):
    st = time.time()

    output_audio_folder = f"./cached/audio"

    if not os.path.exists(output_audio_folder):
        os.makedirs(output_audio_folder)

    if("https://www.youtube.com" in audio_path):
        output_file = download_youtube(audio_path)
    else:
        output_file = f"{output_audio_folder}/{audio_path.split('/')[-1].split(".")[0]}.mp3"
        convert_to_audio(audio_path.strip(), output_file)

    try:
        text, chunks = get_transcribe_transformers(output_file, model_size)
    except Exception as error:
        raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=f"error>>>: {error}")
    finally:
         if os.path.exists(output_file):
            os.remove(output_file)

    listSentences = []

    for chunk in chunks:
        listSentences.append({
            "start_time": chunk.get("timestamp")[0],
            "end_time": chunk.get("timestamp")[1],
            "text": chunk.get("text")
        })

    et = time.time()
    elapsed_time = et - st

    return {"text": text,
            'list_sentence':  listSentences,
            'elapsed_time': round(elapsed_time, 2)
            }