video-qa / utils.py
Thao Pham
First commit
d50ce1c
raw
history blame
3.36 kB
import os
from io import StringIO, BytesIO
from typing import Iterator, TextIO, List, Dict, Any, Optional, Sequence, Union
import base64
import glob
from tqdm import tqdm
from pytubefix import YouTube, Stream
import cv2
import json
# Taken from the course: https://www.deeplearning.ai/short-courses/multimodal-rag-chat-with-videos/
def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int=-1) -> str:
segmentStream = StringIO()
if format == 'vtt':
write_vtt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
elif format == 'srt':
write_srt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
else:
raise Exception("Unknown format " + format)
segmentStream.seek(0)
return segmentStream.read()
def download_video(video_url, path='/tmp/'):
print(f'Getting video information for {video_url}')
if not video_url.startswith('http'):
return os.path.join(path, video_url)
filepath = glob.glob(os.path.join(path, '*.mp4'))
if len(filepath) > 0:
return filepath[0]
def progress_callback(stream: Stream, data_chunk: bytes, bytes_remaining: int) -> None:
pbar.update(len(data_chunk))
yt = YouTube(video_url, on_progress_callback=progress_callback)
stream = yt.streams.filter(progressive=True, file_extension='mp4', res='720p').desc().first()
if stream is None:
stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
if not os.path.exists(path):
os.makedirs(path)
filepath = os.path.join(path, stream.default_filename)
if not os.path.exists(filepath):
print('Downloading video from YouTube...')
pbar = tqdm(desc='Downloading video from YouTube', total=stream.filesize, unit="bytes")
stream.download(path)
pbar.close()
return filepath
# a help function that helps to convert a specific time written as a string in format `webvtt` into a time in miliseconds
def str2time(strtime):
# strip character " if exists
strtime = strtime.strip('"')
# get hour, minute, second from time string
hrs, mins, seconds = [float(c) for c in strtime.split(':')]
# get the corresponding time as total seconds
total_seconds = hrs * 60**2 + mins * 60 + seconds
total_miliseconds = total_seconds * 1000
return total_miliseconds
# Resizes a image and maintains aspect ratio
def maintain_aspect_ratio_resize(image, width=None, height=None, inter=cv2.INTER_AREA):
# Grab the image size and initialize dimensions
dim = None
(h, w) = image.shape[:2]
# Return original image if no need to resize
if width is None and height is None:
return image
# We are resizing height if width is none
if width is None:
# Calculate the ratio of the height and construct the dimensions
r = height / float(h)
dim = (int(w * r), height)
# We are resizing width if height is none
else:
# Calculate the ratio of the width and construct the dimensions
r = width / float(w)
dim = (width, int(h * r))
# Return the resized image
return cv2.resize(image, dim, interpolation=inter)
def load_json_file(file_path):
# Open the JSON file in read mode
with open(file_path, 'r') as file:
data = json.load(file)
return data