from dotenv import load_dotenv import streamlit as st from moviepy.editor import VideoFileClip, AudioFileClip import cv2 import base64 import io import openai import os import requests import tempfile # Load environment variables from .env.local load_dotenv('.env.local') def check_password(): correct_password = os.getenv('PASSWORD') if correct_password is None: st.error("Password is not set in .env.local") return False user_password = st.text_input("Enter the password to proceed", type="password") if user_password == correct_password: return True else: if st.button("Check Password"): st.error("Incorrect password") return False def video_to_frames(video_file, frame_sampling_rate=1): with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmpfile: tmpfile.write(video_file.read()) video_filename = tmpfile.name video_clip = VideoFileClip(video_filename) video_duration = video_clip.duration fps = video_clip.fps frames_to_skip = int(fps * frame_sampling_rate) video = cv2.VideoCapture(video_filename) base64Frame = [] current_frame = 0 while video.isOpened(): success, frame = video.read() if not success: break if current_frame % frames_to_skip == 0: _, buffer = cv2.imencode('.jpg', frame) base64Frame.append(base64.b64encode(buffer).decode("utf-8")) current_frame += 1 video.release() print(f"{len(base64Frame)} frames read at a sampling rate of {frame_sampling_rate} second(s) per frame.") return base64Frame, video_filename, video_duration def frames_to_story(base64Frames, prompt, api_key): PROMPT_MESSAGES = [ { "role": "user", "content": [ prompt, *map(lambda x: {"image": x, "resize": 768}, base64Frames[0::50]), ], }, ] params = { "model": "gpt-4-vision-preview", "messages": PROMPT_MESSAGES, "api_key": api_key, "headers": {"Openai-Version": "2020-11-07"}, "max_tokens": 700, } result = openai.ChatCompletion.create(**params) print(result.choices[0].message.content) return result.choices[0].message.content def text_to_audio(text, api_key, voice): response = requests.post( "https://api.openai.com/v1/audio/speech", headers={ "Authorization": f"Bearer {api_key}", }, json={ "model": "tts-1", "input": text, "voice": voice, }, ) if response.status_code != 200: raise Exception("Request failed with status code") audio_bytes_io = io.BytesIO() for chunk in response.iter_content(chunk_size=1024*1024): audio_bytes_io.write(chunk) audio_bytes_io.seek(0) with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile: for chunk in response.iter_content(chunk_size=1024*1024): tmpfile.write(chunk) audio_filename = tmpfile.name return audio_filename, audio_bytes_io def merge_audio_video(video_filename, audio_filename, output_filename): print("Merging audio and video ...") # Load the video file video_clip = VideoFileClip(video_filename) # Load the audio file audio_clip = AudioFileClip(audio_filename) # Determine the shortest duration between audio and video min_duration = min(video_clip.duration, audio_clip.duration) # Set the audio of the video clip as the audio file, trimming to the shortest duration video_clip = video_clip.subclip(0, min_duration) audio_clip = audio_clip.subclip(0, min_duration) final_clip = video_clip.set_audio(audio_clip) # Write the result to a file final_clip.write_videofile(output_filename, codec='libx264', audio_codec="aac") # Close the clips video_clip.close() audio_clip.close() return output_filename # def merge_audio_video(video_filename, audio_filename, output_filename): # print("Merging audio and video ...") # video_clip = VideoFileClip(video_filename) # audio_clip = AudioFileClip(audio_filename) # final_clip = video_clip.set_audio(audio_clip) # final_clip.write_videofile(output_filename, codec='libx264', audio_codec="aac") # video_clip.close() # audio_clip.close() # return output_filename def main(): st.set_page_config(page_title="AI Voiceover", page_icon="🔮") st.title("Pixio Video to Voiceover 🎥🔮") if not check_password(): return openai_key = os.getenv('OPENAI_API_KEY') if not openai_key: st.error("OpenAI API key is not set in .env.local") return uploaded_file = st.file_uploader("Select a video file", type=["mp4", "avi"]) voice_options = { 'Echo (Male)': 'echo', 'Fable (Male)': 'fable', 'Onyx (Male)': 'onyx', 'Nova (Female)': 'nova', 'Shimmer (Female)': 'shimmer', 'Alloy (Female)': 'alloy' } option = st.selectbox('Choose the voice you want', list(voice_options.keys())) classify = voice_options[option] duration_options = list(range(10, 121, 10)) # 10 to 120 seconds, in 10 second intervals selected_duration = st.selectbox('Select the desired video duration (seconds)', duration_options) # New dropdown for script generator type script_type_options = { 'Product Tutorial': 'Product Tutorial', 'TikTok': 'TikTok', 'YouTube Short': 'YouTube Short', 'Website Tutorial': 'Website Tutorial', 'General Info': 'General Info' } selected_script_type = st.selectbox('Choose the script generator type', list(script_type_options.keys())) # Incorporating the selected script type and duration into the prompt dynamic_prompt_intro = f"Script type: {selected_script_type}. Generate a voiceover script that is approximately {selected_duration} seconds long, tailored to the content and format of a {selected_script_type.lower()}." prompt = st.text_area("Edit the voiceover script prompt as needed:", value=dynamic_prompt_intro, height=300) if uploaded_file is not None and st.button("START PROCESSING", type="primary"): with st.spinner("Video is being processed..."): base64Frame, video_filename, video_duration = video_to_frames(uploaded_file, frame_sampling_rate=1) if video_duration > selected_duration: st.error(f"The video exceeds the selected duration of {selected_duration} seconds.") return text = frames_to_story(base64Frame, prompt, openai_key) st.write(text) audio_filename, audio_bytes_io = text_to_audio(text, openai_key, classify) output_video_filename = os.path.splitext(video_filename)[0] + "_output.mp4" final_video_filename = merge_audio_video(video_filename, audio_filename, output_video_filename) st.video(final_video_filename) os.unlink(video_filename) os.unlink(audio_filename) os.unlink(final_video_filename) if __name__ == "__main__": main()