learningai's picture
Update utils.py
66b0cbb
raw
history blame
1.9 kB
import os
from pathlib import Path
from logger import logging
from typing import Optional
import whisper
import config
from datetime import datetime
def get_video_from_yt(video_url : str, save_file_dir : str, video_name : str="yt_audio") -> Optional[Path]:
"""
Download YouTube video as an audio file in .wav . Returns the path of the
downloaded file as string
"""
logging.info(f"Attempting youtube video download for URL : {video_url}")
try :
# create directory if not exists
os.makedirs(save_file_dir, exist_ok=True)
filepath = f"{save_file_dir}/{video_name}_{datetime.now().strftime('%d%m%Y__%H%M%S')}"
# download the file
os.system(f'yt-dlp --quiet -o {filepath} -x --audio-format "wav" {video_url}')
if os.path.exists(f"{filepath}.wav"):
logging.info(f"Download successful. Audio file path : {filepath}.wav")
return f"{filepath}.wav"
else:
logging.info("Download unsuccessful. Please check logs.")
return None
except Exception as e:
logging.info("Download unsuccessful.")
logging.exception(e)
return None
def get_text_from_audio(audio_path : str) -> Optional[str]:
"""
Extracts text from audio file.
"""
logging.info(f"Attempting to extract text from : {audio_path}")
try :
model = whisper.load_model(name='tiny', download_root=config.MODEL_DIR)
results = model.transcribe(audio_path)
logging.info("Extraction successful.")
return results['text']
except Exception as e:
logging.info("Extraction failed.")
logging.exception(e)
return None
#URL = "https://www.youtube.com/watch?v=iO5LjrQaN9s"
#save_file_dir = "audio_files"
#video_path = get_video_from_yt(URL, save_file_dir)
#if video_path:
# print(get_text_from_audio(video_path))