faster-whisper-readme / downloader_manager.py
ManBib's picture
added possibility for video processing via link
96d549d
raw
history blame
1.94 kB
import logging
import os
import tempfile
from io import BytesIO
import requests
from moviepy.editor import VideoFileClip
from tqdm import tqdm
def download_mp4_and_extract_audio(link: str):
"""Download an MP4 file from a given link and return the video and audio content as bytes."""
logging.info("Starting the download of the MP4 file...")
try:
r = requests.get(link, stream=True)
r.raise_for_status()
total_size = int(r.headers.get('content-length', 0))
video_content = BytesIO()
with tqdm(total=total_size, unit='B', unit_scale=True, desc="Downloading...") as bar:
for data in r.iter_content(chunk_size=1024):
bar.update(len(data))
video_content.write(data)
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video_file:
temp_video_file.write(video_content.getvalue())
temp_video_file_path = temp_video_file.name
logging.info("Extracting audio from video...")
with VideoFileClip(temp_video_file_path) as video:
audio = video.audio
with tempfile.NamedTemporaryFile(suffix=".aac", delete=False) as temp_audio_file:
audio.write_audiofile(temp_audio_file.name, codec='aac')
temp_audio_file_path = temp_audio_file.name
with open(temp_audio_file_path, 'rb') as f:
audio_content = BytesIO(f.read())
os.remove(temp_video_file_path)
os.remove(temp_audio_file_path)
logging.info("Download and audio extraction completed")
return video_content.getvalue(), audio_content.getvalue()
except requests.exceptions.HTTPError as e:
logging.error(f"HTTP Error: {e}")
except Exception as e:
logging.error(f"Failed to download MP4 and extract audio: {e}")
return None, None