Spaces:
Sleeping
Sleeping
File size: 1,842 Bytes
a9e849e c5bb903 a9e849e c5bb903 3c36fb5 c5bb903 3c36fb5 c5bb903 a9e849e c5bb903 a9e849e 3c36fb5 a9e849e c5bb903 4fef98a c5bb903 a9e849e c5bb903 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import os
import time
from fastapi import APIRouter, Depends, HTTPException, status
from libs.convert_to_audio import convert_to_audio
from libs.transformer.get_transcript import get_transcript_gpu
from libs.transformer.get_transcript_2 import get_transcribe_transformers
from langchain_community.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader
from libs.header_api_auth import get_api_key
from libs.transformer.youtube_download import download_youtube
router = APIRouter(prefix="/get-transcript-transformer", tags=["transcript"])
@router.get("/")
def get_transcript(audio_path: str, model_size: str = "distil-whisper/distil-small.en", api_key: str = Depends(get_api_key)):
st = time.time()
output_audio_folder = f"./cached/audio"
if not os.path.exists(output_audio_folder):
os.makedirs(output_audio_folder)
if("https://www.youtube.com" in audio_path):
output_file = download_youtube(audio_path)
else:
output_file = f"{output_audio_folder}/{audio_path.split('/')[-1].split(".")[0]}.mp3"
convert_to_audio(audio_path.strip(), output_file)
try:
text, chunks = get_transcribe_transformers(output_file, model_size)
except Exception as error:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=f"error>>>: {error}")
finally:
if os.path.exists(output_file):
os.remove(output_file)
listSentences = []
for chunk in chunks:
listSentences.append({
"start_time": chunk.get("timestamp")[0],
"end_time": chunk.get("timestamp")[1],
"text": chunk.get("text")
})
et = time.time()
elapsed_time = et - st
return {"text": text,
'list_sentence': listSentences,
'elapsed_time': round(elapsed_time, 2)
}
|