|
from moviepy.editor import VideoFileClip |
|
import os |
|
import requests |
|
from ai_config_faiss import timestamp_to_seconds |
|
from pytube import YouTube |
|
import yt_dlp |
|
|
|
|
|
CLIP_DIR = "clip/" |
|
if not os.path.exists(CLIP_DIR): |
|
os.makedirs(CLIP_DIR) |
|
|
|
|
|
def get_youtube_video(cache_dir, yt_id): |
|
yt_url = f"https://www.youtube.com/watch?v={yt_id}" |
|
download_file = cache_dir + yt_id + ".mp4" |
|
|
|
if os.path.exists(download_file): |
|
print(f"{yt_url} already cached.") |
|
return download_file |
|
|
|
|
|
if try_yt_dlp_download(yt_url, download_file): |
|
return download_file |
|
|
|
|
|
if try_pytube_download(yt_url, download_file): |
|
return download_file |
|
|
|
|
|
if try_cobalt_api(yt_url, download_file): |
|
return download_file |
|
|
|
return None |
|
|
|
|
|
def try_cobalt_api(yt_url, download_file): |
|
cobalt_api_url = "https://api.cobalt.tools/api/json" |
|
headers = { |
|
"Accept": "application/json", |
|
"Content-Type": "application/json" |
|
} |
|
payload = { |
|
"url": yt_url, |
|
"vCodec": "h264", |
|
"vQuality": "720", |
|
"aFormat": "mp3", |
|
"isAudioOnly": False |
|
} |
|
|
|
try: |
|
response = requests.post(cobalt_api_url, headers=headers, json=payload) |
|
response.raise_for_status() |
|
data = response.json() |
|
|
|
if data['status'] == 'success' and 'url' in data: |
|
video_url = data['url'] |
|
video_response = requests.get(video_url) |
|
video_response.raise_for_status() |
|
|
|
with open(download_file, 'wb') as file: |
|
file.write(video_response.content) |
|
|
|
print(f"Video downloaded successfully using Cobalt API: " |
|
f"{download_file}") |
|
return True |
|
else: |
|
print(f"Cobalt API Error: {data.get('text', 'Unknown error')}") |
|
return False |
|
except requests.exceptions.RequestException as e: |
|
print(f"Cobalt API Error: Unable to process the YouTube URL. {str(e)}") |
|
return False |
|
|
|
|
|
def try_pytube_download(yt_url, download_file): |
|
try: |
|
yt = YouTube(yt_url) |
|
video = yt.streams.filter(progressive=True, file_extension='mp4').order_by( |
|
'resolution').desc().first() |
|
video.download(filename=download_file) |
|
print(f"Video downloaded successfully using pytube: {download_file}") |
|
return True |
|
except Exception as e: |
|
print(f"Pytube Error: Unable to download the YouTube video. {str(e)}") |
|
return False |
|
|
|
|
|
def try_yt_dlp_download(yt_url, download_file): |
|
ydl_opts = { |
|
'format': 'bestvideo[ext=h264]+bestaudio[ext=mp3]/best[ext=h264]/best', |
|
'outtmpl': download_file, |
|
} |
|
try: |
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
ydl.download([yt_url]) |
|
print(f"Video downloaded successfully using yt-dlp: {download_file}") |
|
return True |
|
except Exception as e: |
|
print(f"yt-dlp Error: Unable to download the YouTube video. {str(e)}") |
|
return False |
|
|
|
|
|
def main(): |
|
video = get_youtube_video("tCDvOQI3pco") |
|
if video: |
|
print(f"Downloaded video: {video}") |
|
else: |
|
print("Failed to download video.") |
|
|
|
|
|
def generate_clips(cache_dir, info): |
|
yt_id = info['metadata']['youtube_id'] |
|
download_file = get_youtube_video(cache_dir, yt_id) |
|
transcript = info['transcript'] |
|
|
|
if download_file: |
|
video = VideoFileClip(download_file) |
|
|
|
for entry in transcript: |
|
start_time = timestamp_to_seconds( |
|
entry['metadata']['start_timestamp']) |
|
end_time = timestamp_to_seconds(entry['metadata']['end_timestamp']) |
|
|
|
|
|
|
|
start_time = max(0, start_time - 1) |
|
end_time = min(video.duration, end_time + |
|
1) if end_time != 0 else video.duration |
|
|
|
|
|
output_filename = ( |
|
f"{CLIP_DIR}{yt_id}-" |
|
f"{start_time}-{end_time}.mp4" |
|
) |
|
|
|
entry['metadata']['download'] = output_filename |
|
|
|
if os.path.exists(output_filename): |
|
continue |
|
|
|
|
|
clip = video.subclip(start_time, end_time) |
|
|
|
|
|
clip.write_videofile( |
|
output_filename, codec="libx264", audio_codec="aac") |
|
|
|
print(f"Generated clip: {output_filename}") |
|
|
|
|
|
video.close() |
|
else: |
|
print(f"Failed to download video for YouTube ID: {yt_id}") |
|
|
|
return transcript |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|