File size: 4,716 Bytes
bea81c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f32772f
bea81c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9344ba5
 
bea81c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2127ae4
bea81c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a23cd86
 
 
 
bea81c7
 
 
 
 
2127ae4
 
 
bea81c7
 
 
 
 
 
 
 
 
 
 
2127ae4
 
bea81c7
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
from moviepy.editor import VideoFileClip
import os
import requests
from ai_config_faiss import timestamp_to_seconds
from pytube import YouTube
import yt_dlp


CLIP_DIR = "clip/"
if not os.path.exists(CLIP_DIR):
    os.makedirs(CLIP_DIR)


def get_youtube_video(cache_dir, yt_id):
    yt_url = f"https://www.youtube.com/watch?v={yt_id}"
    download_file = cache_dir + yt_id + ".mp4"

    if os.path.exists(download_file):
        print(f"{yt_url} already cached.")
        return download_file

    # try yt_dlp
    if try_yt_dlp_download(yt_url, download_file):
        return download_file

    # try pytube
    if try_pytube_download(yt_url, download_file):
        return download_file

    # Try Cobalt API
    if try_cobalt_api(yt_url, download_file):
        return download_file

    return None


def try_cobalt_api(yt_url, download_file):
    cobalt_api_url = "https://api.cobalt.tools/api/json"
    headers = {
        "Accept": "application/json",
        "Content-Type": "application/json"
    }
    payload = {
        "url": yt_url,
        "vCodec": "h264",
        "vQuality": "720",
        "aFormat": "mp3",
        "isAudioOnly": False
    }

    try:
        response = requests.post(cobalt_api_url, headers=headers, json=payload)
        response.raise_for_status()
        data = response.json()

        if data['status'] == 'success' and 'url' in data:
            video_url = data['url']
            video_response = requests.get(video_url)
            video_response.raise_for_status()

            with open(download_file, 'wb') as file:
                file.write(video_response.content)

            print(f"Video downloaded successfully using Cobalt API: "
                  f"{download_file}")
            return True
        else:
            print(f"Cobalt API Error: {data.get('text', 'Unknown error')}")
            return False
    except requests.exceptions.RequestException as e:
        print(f"Cobalt API Error: Unable to process the YouTube URL. {str(e)}")
        return False


def try_pytube_download(yt_url, download_file):
    try:
        yt = YouTube(yt_url)
        video = yt.streams.filter(progressive=True, file_extension='mp4').order_by(
            'resolution').desc().first()
        video.download(filename=download_file)
        print(f"Video downloaded successfully using pytube: {download_file}")
        return True
    except Exception as e:
        print(f"Pytube Error: Unable to download the YouTube video. {str(e)}")
        return False


def try_yt_dlp_download(yt_url, download_file):
    ydl_opts = {
        'format': 'bestvideo[ext=h264]+bestaudio[ext=mp3]/best[ext=h264]/best',
        'outtmpl': download_file,
    }
    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([yt_url])
        print(f"Video downloaded successfully using yt-dlp: {download_file}")
        return True
    except Exception as e:
        print(f"yt-dlp Error: Unable to download the YouTube video. {str(e)}")
        return False


def main():
    video = get_youtube_video("tCDvOQI3pco")
    if video:
        print(f"Downloaded video: {video}")
    else:
        print("Failed to download video.")


def generate_clips(cache_dir, info):
    yt_id = info['metadata']['youtube_id']
    download_file = get_youtube_video(cache_dir, yt_id)
    transcript = info['transcript']

    if download_file:
        video = VideoFileClip(download_file)

        for entry in transcript:
            start_time = timestamp_to_seconds(
                entry['metadata']['start_timestamp'])
            end_time = timestamp_to_seconds(entry['metadata']['end_timestamp'])

            # Adjust start and end times
            # Start 1 second earlier, but not before 0
            start_time = max(0, start_time - 1)
            end_time = min(video.duration, end_time +
                           1) if end_time != 0 else video.duration

            # Generate output filename
            output_filename = (
                f"{CLIP_DIR}{yt_id}-"
                f"{start_time}-{end_time}.mp4"
            )

            entry['metadata']['download'] = output_filename

            if os.path.exists(output_filename):
                continue
            
            # Create clip
            clip = video.subclip(start_time, end_time)

            # Write the clip to a file
            clip.write_videofile(
                output_filename, codec="libx264", audio_codec="aac")

            print(f"Generated clip: {output_filename}")

        # Close the video to free up resources
        video.close()
    else:
        print(f"Failed to download video for YouTube ID: {yt_id}")
    
    return transcript


if __name__ == "__main__":
    main()