codewithdark commited on
Commit
9b70717
Β·
verified Β·
1 Parent(s): c3e3d75

Upload 9 files

Browse files
utility/audio_generator.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import edge_tts
2
+ import os
3
+ import logging
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+ async def generate_audio(text, outputFilename):
8
+ """Generate audio from text using edge_tts
9
+
10
+ Args:
11
+ text (str): Text to convert to speech
12
+ outputFilename (str): Path to save the audio file
13
+
14
+ Raises:
15
+ Exception: If audio generation fails
16
+ """
17
+ try:
18
+ # Ensure output directory exists
19
+ os.makedirs(os.path.dirname(outputFilename), exist_ok=True)
20
+
21
+ logger.info(f"Generating audio for text length: {len(text)}")
22
+ # Updated voice parameter below:
23
+ communicate = edge_tts.Communicate(text, "en-US-GuyNeural")
24
+ await communicate.save(outputFilename)
25
+
26
+ if not os.path.exists(outputFilename):
27
+ raise Exception(f"Failed to create audio file at {outputFilename}")
28
+
29
+ logger.info(f"Successfully generated audio at {outputFilename}")
30
+ except Exception as e:
31
+ logger.error(f"Error generating audio: {str(e)}")
32
+ raise Exception(f"Audio generation failed: {str(e)}")
utility/background_video_generator.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from utility.utils import log_response,LOG_TYPE_PEXEL
4
+
5
+ PEXELS_API_KEY = os.environ.get('PEXELS_KEY')
6
+
7
+ def search_videos(query_string, orientation_landscape=True):
8
+
9
+ url = "https://api.pexels.com/videos/search"
10
+ headers = {
11
+ "Authorization": PEXELS_API_KEY,
12
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
13
+ }
14
+ params = {
15
+ "query": query_string,
16
+ "orientation": "landscape" if orientation_landscape else "portrait",
17
+ "per_page": 15
18
+ }
19
+
20
+ response = requests.get(url, headers=headers, params=params)
21
+ json_data = response.json()
22
+ log_response(LOG_TYPE_PEXEL,query_string,response.json())
23
+
24
+ return json_data
25
+
26
+
27
+ def getBestVideo(query_string, orientation_landscape=True, used_vids=[]):
28
+ vids = search_videos(query_string, orientation_landscape)
29
+ videos = vids['videos'] # Extract the videos list from JSON
30
+
31
+ # Filter and extract videos with width and height as 1920x1080 for landscape or 1080x1920 for portrait
32
+ if orientation_landscape:
33
+ filtered_videos = [video for video in videos if video['width'] >= 1920 and video['height'] >= 1080 and video['width']/video['height'] == 16/9]
34
+ else:
35
+ filtered_videos = [video for video in videos if video['width'] >= 1080 and video['height'] >= 1920 and video['height']/video['width'] == 16/9]
36
+
37
+ # Sort the filtered videos by duration in ascending order
38
+ sorted_videos = sorted(filtered_videos, key=lambda x: abs(15-int(x['duration'])))
39
+
40
+ # Extract the top 3 videos' URLs
41
+ for video in sorted_videos:
42
+ for video_file in video['video_files']:
43
+ if orientation_landscape:
44
+ if video_file['width'] == 1920 and video_file['height'] == 1080:
45
+ if not (video_file['link'].split('.hd')[0] in used_vids):
46
+ return video_file['link']
47
+ else:
48
+ if video_file['width'] == 1080 and video_file['height'] == 1920:
49
+ if not (video_file['link'].split('.hd')[0] in used_vids):
50
+ return video_file['link']
51
+ print("NO LINKS found for this round of search with query :", query_string)
52
+ return None
53
+
54
+
55
+ def generate_video_url(timed_video_searches,video_server):
56
+ timed_video_urls = []
57
+ if video_server == "pexel":
58
+ used_links = []
59
+ for (t1, t2), search_terms in timed_video_searches:
60
+ url = ""
61
+ for query in search_terms:
62
+
63
+ url = getBestVideo(query, orientation_landscape=True, used_vids=used_links)
64
+ if url:
65
+ used_links.append(url.split('.hd')[0])
66
+ break
67
+ timed_video_urls.append([[t1, t2], url])
68
+ elif video_server == "stable_diffusion":
69
+ timed_video_urls = get_images_for_video(timed_video_searches)
70
+
71
+ return timed_video_urls
utility/conf.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from pathlib import Path
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+ # ImageMagick configuration
8
+ IMAGEMAGICK_BINARY = r"C:\Program Files\ImageMagick-7.1.1-Q16-HDRI\magick.exe"
9
+
10
+ # Validate ImageMagick path
11
+ if not Path(IMAGEMAGICK_BINARY).exists():
12
+ error_msg = f"ImageMagick not found at {IMAGEMAGICK_BINARY}. Please install ImageMagick and update the path."
13
+ logger.error(error_msg)
14
+ raise FileNotFoundError(error_msg)
utility/render_engine.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import os
3
+ import tempfile
4
+ import zipfile
5
+ import platform
6
+ import subprocess
7
+ import logging
8
+ from pathlib import Path
9
+ from moviepy.editor import (AudioFileClip, CompositeVideoClip, CompositeAudioClip, ImageClip,
10
+ TextClip, VideoFileClip)
11
+ from moviepy.audio.fx.audio_loop import audio_loop
12
+ from moviepy.audio.fx.audio_normalize import audio_normalize
13
+ import requests
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def download_file(url, filename):
19
+ with open(filename, 'wb') as f:
20
+ headers = {
21
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
22
+ }
23
+ response = requests.get(url, headers=headers)
24
+ f.write(response.content)
25
+
26
+ def search_program(program_name):
27
+ try:
28
+ search_cmd = "where" if platform.system() == "Windows" else "which"
29
+ return subprocess.check_output([search_cmd, program_name]).decode().strip()
30
+ except subprocess.CalledProcessError:
31
+ return None
32
+
33
+ def get_program_path(program_name):
34
+ program_path = search_program(program_name)
35
+ return program_path
36
+
37
+ def get_output_media(audio_file_path, timed_captions, background_video_data, video_server):
38
+ """Generate final video with audio and captions
39
+
40
+ Args:
41
+ audio_file_path (str): Path to audio file
42
+ timed_captions (list): List of timed captions
43
+ background_video_data (list): List of background video data
44
+ video_server (str): Video server URL
45
+
46
+ Returns:
47
+ str: Path to output video file
48
+
49
+ Raises:
50
+ Exception: If video rendering fails
51
+ """
52
+ OUTPUT_FILE_NAME = "rendered_video.mp4"
53
+ from utility.conf import IMAGEMAGICK_BINARY
54
+ from moviepy.config import change_settings
55
+
56
+ try:
57
+ # Validate input files
58
+ if not Path(audio_file_path).exists():
59
+ raise FileNotFoundError(f"Audio file not found at {audio_file_path}")
60
+
61
+ try:
62
+ change_settings({"IMAGEMAGICK_BINARY": IMAGEMAGICK_BINARY})
63
+ logger.info(f"Using ImageMagick from: {IMAGEMAGICK_BINARY}")
64
+ except Exception as e:
65
+ logger.error(f"Error configuring ImageMagick: {str(e)}")
66
+ raise Exception(f"ImageMagick configuration failed: {str(e)}")
67
+ except Exception as e:
68
+ logger.error(f"Error in initial setup: {str(e)}")
69
+ raise Exception(f"Initial setup failed: {str(e)}")
70
+
71
+
72
+
73
+
74
+
75
+
76
+ visual_clips = []
77
+ for (t1, t2), video_url in background_video_data:
78
+ try:
79
+ # Download the video file
80
+ video_filename = tempfile.NamedTemporaryFile(delete=False).name
81
+ logger.info(f"Downloading video from {video_url}")
82
+ download_file(video_url, video_filename)
83
+
84
+ if not Path(video_filename).exists():
85
+ raise FileNotFoundError(f"Failed to download video from {video_url}")
86
+
87
+ # Create VideoFileClip from the downloaded file
88
+ video_clip = VideoFileClip(video_filename)
89
+ if video_clip is None:
90
+ raise ValueError(f"Failed to create video clip from {video_filename}")
91
+
92
+ video_clip = video_clip.set_start(t1)
93
+ video_clip = video_clip.set_end(t2)
94
+ visual_clips.append(video_clip)
95
+ logger.info(f"Added video clip from {video_url} ({t1}-{t2}s)")
96
+
97
+ except Exception as e:
98
+ logger.error(f"Error processing video {video_url}: {str(e)}")
99
+ raise Exception(f"Failed to process video {video_url}: {str(e)}")
100
+
101
+
102
+ audio_clips = []
103
+ try:
104
+ # Verify audio file exists and is valid
105
+ if not os.path.exists(audio_file_path):
106
+ raise FileNotFoundError(f"Audio file not found: {audio_file_path}")
107
+
108
+ audio_file_clip = AudioFileClip(audio_file_path)
109
+ if audio_file_clip is None:
110
+ raise ValueError(f"Failed to create audio clip from {audio_file_path}")
111
+
112
+ # Normalize audio volume
113
+ audio_file_clip = audio_normalize(audio_file_clip)
114
+
115
+ # Verify audio duration
116
+ if audio_file_clip.duration <= 0:
117
+ raise ValueError("Audio file has zero or negative duration")
118
+
119
+ audio_clips.append(audio_file_clip)
120
+ logger.info(f"Added audio clip from {audio_file_path} (duration: {audio_file_clip.duration:.2f}s)")
121
+
122
+ except Exception as e:
123
+ logger.error(f"Error processing audio: {str(e)}")
124
+ raise Exception(f"Failed to process audio: {str(e)}")
125
+
126
+
127
+ for (t1, t2), text in timed_captions:
128
+ try:
129
+ # Updated caption style: changed font, fontsize, and position.
130
+ text_clip = TextClip(
131
+ txt=text,
132
+ fontsize=70,
133
+ font="Arial-Bold",
134
+ color="white",
135
+ stroke_width=2,
136
+ stroke_color="black",
137
+ method="label"
138
+ )
139
+ # Set the text to appear at the bottom-center
140
+ text_clip = text_clip.set_start(t1).set_end(t2).set_position(('center','bottom'))
141
+ visual_clips.append(text_clip)
142
+ logger.info(f"Added text clip: {text} ({t1}-{t2}s)")
143
+ except Exception as e:
144
+ logger.error(f"Error creating text clip: {str(e)}")
145
+ raise Exception(f"Failed to create text clip: {str(e)}")
146
+
147
+
148
+ try:
149
+ if not visual_clips:
150
+ raise ValueError("No visual clips available for rendering")
151
+
152
+ video = CompositeVideoClip(visual_clips)
153
+
154
+ if audio_clips:
155
+ audio = CompositeAudioClip(audio_clips)
156
+ # Ensure video duration matches audio and update video with audio properly
157
+ if video.duration < audio.duration:
158
+ last_clip = visual_clips[-1]
159
+ extended_clip = last_clip.set_end(audio.duration)
160
+ visual_clips[-1] = extended_clip
161
+ video = CompositeVideoClip(visual_clips)
162
+
163
+ video = video.set_duration(audio.duration)
164
+ # Updated audio application using set_audio
165
+ video = video.set_audio(audio)
166
+ logger.info(f"Audio synchronized with video (duration: {video.duration:.2f}s)")
167
+
168
+
169
+ logger.info(f"Rendering final video to {OUTPUT_FILE_NAME}")
170
+ video.write_videofile(OUTPUT_FILE_NAME, codec='libx264', audio_codec='aac', fps=25, preset='veryfast')
171
+
172
+ # Clean up downloaded files
173
+ for (t1, t2), video_url in background_video_data:
174
+ video_filename = tempfile.NamedTemporaryFile(delete=False).name
175
+ if Path(video_filename).exists():
176
+ os.remove(video_filename)
177
+ logger.info(f"Cleaned up temporary file: {video_filename}")
178
+
179
+ if not Path(OUTPUT_FILE_NAME).exists():
180
+ raise FileNotFoundError(f"Failed to create output video at {OUTPUT_FILE_NAME}")
181
+
182
+ logger.info(f"Successfully rendered video at {OUTPUT_FILE_NAME}")
183
+ return OUTPUT_FILE_NAME
184
+
185
+ except Exception as e:
186
+ logger.error(f"Error rendering video: {str(e)}")
187
+ raise Exception(f"Video rendering failed: {str(e)}")
utility/script_generator.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from openai import OpenAI
3
+ import json
4
+
5
+
6
+ from groq import Groq
7
+ model = "mixtral-8x7b-32768"
8
+ client = Groq(
9
+ api_key=os.environ.get("GROQ_API_KEY"),
10
+ )
11
+
12
+ def generate_script(topic):
13
+ prompt = (
14
+ """You are a seasoned content writer for a YouTube Shorts channel, specializing in facts videos.
15
+ Your facts shorts are concise, each lasting less than 50 seconds (approximately 140 words).
16
+ They are incredibly engaging and original. When a user requests a specific type of facts short, you will create it.
17
+
18
+ For instance, if the user asks for:
19
+ Weird facts
20
+ You would produce content like this:
21
+
22
+ Weird facts you don't know:
23
+ - Bananas are berries, but strawberries aren't.
24
+ - A single cloud can weigh over a million pounds.
25
+ - There's a species of jellyfish that is biologically immortal.
26
+ - Honey never spoils; archaeologists have found pots of honey in ancient Egyptian tombs that are over 3,000 years old and still edible.
27
+ - The shortest war in history was between Britain and Zanzibar on August 27, 1896. Zanzibar surrendered after 38 minutes.
28
+ - Octopuses have three hearts and blue blood.
29
+
30
+ You are now tasked with creating the best short script based on the user's requested type of 'facts'.
31
+
32
+ Keep it brief, highly interesting, and unique.
33
+
34
+ Stictly output the script in a JSON format like below, and only provide a parsable JSON object with the key 'script'.
35
+
36
+ # Output
37
+ {"script": "Here is the script ..."}
38
+ """
39
+ )
40
+
41
+ response = client.chat.completions.create(
42
+ model=model,
43
+ messages=[
44
+ {"role": "system", "content": prompt},
45
+ {"role": "user", "content": topic}
46
+ ]
47
+ )
48
+ content = response.choices[0].message.content
49
+ try:
50
+ # Basic cleanup of common JSON formatting issues
51
+ content = content.strip()
52
+ # Parse JSON directly
53
+ response_dict = json.loads(content)
54
+ script = response_dict["script"]
55
+ except Exception as e:
56
+ print(f"Error parsing script: {e}")
57
+ print("Raw content:", content)
58
+ script = "Failed to generate script. Please try again."
59
+
60
+ return script
utility/text_audio_cleaner.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import os
3
+ from pydub import AudioSegment
4
+
5
+ def clean_text(text):
6
+ """
7
+ Removes symbols and cleans the input text.
8
+ """
9
+ text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
10
+ text = text.strip() # Remove leading/trailing whitespace
11
+ return text
12
+
13
+ def process_audio(audio_path, output_path):
14
+ """
15
+ Cleans and normalizes the audio.
16
+ """
17
+ try:
18
+ audio = AudioSegment.from_file(audio_path)
19
+ # Simple normalization (you can add more sophisticated methods)
20
+ normalized_audio = audio.normalize()
21
+ normalized_audio.export(output_path, format="wav")
22
+ return output_path
23
+ except Exception as e:
24
+ print(f"Error processing audio: {e}")
25
+ return None
26
+
27
+ def convert_to_wav(input_file):
28
+ """
29
+ Convert any audio file to WAV format.
30
+ """
31
+ try:
32
+ # Load the audio file
33
+ audio = AudioSegment.from_file(input_file)
34
+
35
+ # Define the output WAV file path
36
+ output_wav = os.path.splitext(input_file)[0] + ".wav"
37
+
38
+ # Export the audio to WAV format
39
+ audio.export(output_wav, format="wav")
40
+
41
+ return output_wav
42
+ except Exception as e:
43
+ print(f"Error converting to WAV: {e}")
44
+ return None
utility/timed_captions_generator.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper_timestamped as whisper
2
+ from whisper_timestamped import load_model, transcribe_timestamped
3
+ import re
4
+
5
+ def generate_timed_captions(audio_filename,model_size="base"):
6
+ WHISPER_MODEL = load_model(model_size)
7
+
8
+ gen = transcribe_timestamped(WHISPER_MODEL, audio_filename, verbose=False, fp16=False)
9
+
10
+ return getCaptionsWithTime(gen)
11
+
12
+ def splitWordsBySize(words, maxCaptionSize):
13
+
14
+ halfCaptionSize = maxCaptionSize / 2
15
+ captions = []
16
+ while words:
17
+ caption = words[0]
18
+ words = words[1:]
19
+ while words and len(caption + ' ' + words[0]) <= maxCaptionSize:
20
+ caption += ' ' + words[0]
21
+ words = words[1:]
22
+ if len(caption) >= halfCaptionSize and words:
23
+ break
24
+ captions.append(caption)
25
+ return captions
26
+
27
+ def getTimestampMapping(whisper_analysis):
28
+
29
+ index = 0
30
+ locationToTimestamp = {}
31
+ for segment in whisper_analysis['segments']:
32
+ for word in segment['words']:
33
+ newIndex = index + len(word['text'])+1
34
+ locationToTimestamp[(index, newIndex)] = word['end']
35
+ index = newIndex
36
+ return locationToTimestamp
37
+
38
+ def cleanWord(word):
39
+
40
+ return re.sub(r'[^\w\s\-_"\'\']', '', word)
41
+
42
+ def interpolateTimeFromDict(word_position, d):
43
+
44
+ for key, value in d.items():
45
+ if key[0] <= word_position <= key[1]:
46
+ return value
47
+ return None
48
+
49
+ def getCaptionsWithTime(whisper_analysis, maxCaptionSize=15, considerPunctuation=False):
50
+
51
+ wordLocationToTime = getTimestampMapping(whisper_analysis)
52
+ position = 0
53
+ start_time = 0
54
+ CaptionsPairs = []
55
+ text = whisper_analysis['text']
56
+
57
+ if considerPunctuation:
58
+ sentences = re.split(r'(?<=[.!?]) +', text)
59
+ words = [word for sentence in sentences for word in splitWordsBySize(sentence.split(), maxCaptionSize)]
60
+ else:
61
+ words = text.split()
62
+ words = [cleanWord(word) for word in splitWordsBySize(words, maxCaptionSize)]
63
+
64
+ for word in words:
65
+ position += len(word) + 1
66
+ end_time = interpolateTimeFromDict(position, wordLocationToTime)
67
+ if end_time and word:
68
+ CaptionsPairs.append(((start_time, end_time), word))
69
+ start_time = end_time
70
+
71
+ return CaptionsPairs
utility/utils.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from datetime import datetime
3
+ import json
4
+
5
+ # Log types
6
+ LOG_TYPE_GPT = "GPT"
7
+ LOG_TYPE_PEXEL = "PEXEL"
8
+
9
+ # log directory paths
10
+ DIRECTORY_LOG_GPT = ".logs/gpt_logs"
11
+ DIRECTORY_LOG_PEXEL = ".logs/pexel_logs"
12
+
13
+ # method to log response from pexel and openai
14
+ def log_response(log_type, query,response):
15
+ log_entry = {
16
+ "query": query,
17
+ "response": response,
18
+ "timestamp": datetime.now().isoformat()
19
+ }
20
+ if log_type == LOG_TYPE_GPT:
21
+ if not os.path.exists(DIRECTORY_LOG_GPT):
22
+ os.makedirs(DIRECTORY_LOG_GPT)
23
+ filename = '{}_gpt3.txt'.format(datetime.now().strftime("%Y%m%d_%H%M%S"))
24
+ filepath = os.path.join(DIRECTORY_LOG_GPT, filename)
25
+ with open(filepath, "w") as outfile:
26
+ outfile.write(json.dumps(log_entry) + '\n')
27
+
28
+ if log_type == LOG_TYPE_PEXEL:
29
+ if not os.path.exists(DIRECTORY_LOG_PEXEL):
30
+ os.makedirs(DIRECTORY_LOG_PEXEL)
31
+ filename = '{}_pexel.txt'.format(datetime.now().strftime("%Y%m%d_%H%M%S"))
32
+ filepath = os.path.join(DIRECTORY_LOG_PEXEL, filename)
33
+ with open(filepath, "w") as outfile:
34
+ outfile.write(json.dumps(log_entry) + '\n')
utility/video_search_query_generator.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ import os
3
+ import json
4
+ import re
5
+ from datetime import datetime
6
+ from utility.utils import log_response,LOG_TYPE_GPT
7
+
8
+ from groq import Groq
9
+ model = "llama3-70b-8192"
10
+ client = Groq(
11
+ api_key=os.environ.get("GROQ_API_KEY"),
12
+ )
13
+
14
+ log_directory = ".logs/gpt_logs"
15
+
16
+ prompt = """# Instructions
17
+
18
+ Given the following video script and timed captions, extract three visually concrete and specific keywords for each time segment that can be used to search for background videos. The keywords should be short and capture the main essence of the sentence. They can be synonyms or related terms. If a caption is vague or general, consider the next timed caption for more context. If a keyword is a single word, try to return a two-word keyword that is visually concrete. If a time frame contains two or more important pieces of information, divide it into shorter time frames with one keyword each. Ensure that the time periods are strictly consecutive and cover the entire length of the video. Each keyword should cover between 2-4 seconds. The output should be in JSON format, like this: [[[t1, t2], ["keyword1", "keyword2", "keyword3"]], [[t2, t3], ["keyword4", "keyword5", "keyword6"]], ...]. Please handle all edge cases, such as overlapping time segments, vague or general captions, and single-word keywords.
19
+
20
+ For example, if the caption is 'The cheetah is the fastest land animal, capable of running at speeds up to 75 mph', the keywords should include 'cheetah running', 'fastest animal', and '75 mph'. Similarly, for 'The Great Wall of China is one of the most iconic landmarks in the world', the keywords should be 'Great Wall of China', 'iconic landmark', and 'China landmark'.
21
+
22
+ Important Guidelines:
23
+
24
+ Use only English in your text queries.
25
+ Each search string must depict something visual.
26
+ The depictions have to be extremely visually concrete, like rainy street, or cat sleeping.
27
+ 'emotional moment' <= BAD, because it doesn't depict something visually.
28
+ 'crying child' <= GOOD, because it depicts something visual.
29
+ The list must always contain the most relevant and appropriate query searches.
30
+ ['Car', 'Car driving', 'Car racing', 'Car parked'] <= BAD, because it's 4 strings.
31
+ ['Fast car'] <= GOOD, because it's 1 string.
32
+ ['Un chien', 'une voiture rapide', 'une maison rouge'] <= BAD, because the text query is NOT in English.
33
+
34
+ Note: Your response should be the response only and no extra text or data.
35
+ """
36
+
37
+ def fix_json(json_str):
38
+ # Replace typographical apostrophes with straight quotes
39
+ json_str = json_str.replace("’", "'")
40
+ # Replace any incorrect quotes (e.g., mixed single and double quotes)
41
+ json_str = json_str.replace("β€œ", "\"").replace("”", "\"").replace("β€˜", "\"").replace("’", "\"")
42
+ # Add escaping for quotes within the strings
43
+ json_str = json_str.replace('"you didn"t"', '"you didn\'t"')
44
+ return json_str
45
+
46
+ def getVideoSearchQueriesTimed(script,captions_timed):
47
+ end = captions_timed[-1][0][1]
48
+ try:
49
+
50
+ out = [[[0,0],""]]
51
+ while out[-1][0][1] != end:
52
+ content = call_OpenAI(script,captions_timed).replace("'",'"')
53
+ try:
54
+ out = json.loads(content)
55
+ except Exception as e:
56
+ print("content: \n", content, "\n\n")
57
+ print(e)
58
+ content = fix_json(content.replace("```json", "").replace("```", ""))
59
+ out = json.loads(content)
60
+ return out
61
+ except Exception as e:
62
+ print("error in response",e)
63
+
64
+ return None
65
+
66
+ def call_OpenAI(script,captions_timed):
67
+ user_content = """Script: {}
68
+ Timed Captions:{}
69
+ """.format(script,"".join(map(str,captions_timed)))
70
+ print("Content", user_content)
71
+
72
+ response = client.chat.completions.create(
73
+ model= model,
74
+ temperature=1,
75
+ messages=[
76
+ {"role": "system", "content": prompt},
77
+ {"role": "user", "content": user_content}
78
+ ]
79
+ )
80
+
81
+ text = response.choices[0].message.content.strip()
82
+ text = re.sub('\s+', ' ', text)
83
+ print("Text", text)
84
+ log_response(LOG_TYPE_GPT,script,text)
85
+ return text
86
+
87
+ def merge_empty_intervals(segments):
88
+ if segments is None:
89
+ return []
90
+
91
+ merged = []
92
+ i = 0
93
+ while i < len(segments):
94
+
95
+ interval, url = segments[i]
96
+ if url is None:
97
+ # Find consecutive None intervals
98
+ j = i + 1
99
+ while j < len(segments) and segments[j][1] is None:
100
+ j += 1
101
+
102
+ # Merge consecutive None intervals with the previous valid URL
103
+ if i > 0:
104
+ prev_interval, prev_url = merged[-1]
105
+ if prev_url is not None and prev_interval[1] == interval[0]:
106
+ merged[-1] = [[prev_interval[0], segments[j-1][0][1]], prev_url]
107
+ else:
108
+ merged.append([interval, prev_url])
109
+ else:
110
+ merged.append([interval, None])
111
+
112
+ i = j
113
+ else:
114
+ merged.append([interval, url])
115
+ i += 1
116
+
117
+ return merged