Spaces:
Sleeping
Sleeping
from dotenv import load_dotenv | |
import streamlit as st | |
from moviepy.editor import VideoFileClip, AudioFileClip | |
import cv2 | |
import base64 | |
import io | |
import openai | |
import os | |
import requests | |
import tempfile | |
# Load environment variables from .env.local | |
load_dotenv('.env.local') | |
def check_password(): | |
correct_password = os.getenv('PASSWORD') | |
if correct_password is None: | |
st.error("Password is not set in .env.local") | |
return False | |
user_password = st.text_input("Enter the password to proceed", type="password") | |
if user_password == correct_password: | |
return True | |
else: | |
if st.button("Check Password"): | |
st.error("Incorrect password") | |
return False | |
def video_to_frames(video_file, frame_sampling_rate=1): | |
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmpfile: | |
tmpfile.write(video_file.read()) | |
video_filename = tmpfile.name | |
video_clip = VideoFileClip(video_filename) | |
video_duration = video_clip.duration | |
fps = video_clip.fps | |
frames_to_skip = int(fps * frame_sampling_rate) | |
video = cv2.VideoCapture(video_filename) | |
base64Frame = [] | |
current_frame = 0 | |
while video.isOpened(): | |
success, frame = video.read() | |
if not success: | |
break | |
if current_frame % frames_to_skip == 0: | |
_, buffer = cv2.imencode('.jpg', frame) | |
base64Frame.append(base64.b64encode(buffer).decode("utf-8")) | |
current_frame += 1 | |
video.release() | |
print(f"{len(base64Frame)} frames read at a sampling rate of {frame_sampling_rate} second(s) per frame.") | |
return base64Frame, video_filename, video_duration | |
def frames_to_story(base64Frames, prompt, api_key): | |
PROMPT_MESSAGES = [ | |
{ | |
"role": "user", | |
"content": [ | |
prompt, | |
*map(lambda x: {"image": x, "resize": 768}, base64Frames[0::50]), | |
], | |
}, | |
] | |
params = { | |
"model": "gpt-4-vision-preview", | |
"messages": PROMPT_MESSAGES, | |
"api_key": api_key, | |
"headers": {"Openai-Version": "2020-11-07"}, | |
"max_tokens": 700, | |
} | |
result = openai.ChatCompletion.create(**params) | |
print(result.choices[0].message.content) | |
return result.choices[0].message.content | |
def text_to_audio(text, api_key, voice): | |
response = requests.post( | |
"https://api.openai.com/v1/audio/speech", | |
headers={ | |
"Authorization": f"Bearer {api_key}", | |
}, | |
json={ | |
"model": "tts-1", | |
"input": text, | |
"voice": voice, | |
}, | |
) | |
if response.status_code != 200: | |
raise Exception("Request failed with status code") | |
audio_bytes_io = io.BytesIO() | |
for chunk in response.iter_content(chunk_size=1024*1024): | |
audio_bytes_io.write(chunk) | |
audio_bytes_io.seek(0) | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile: | |
for chunk in response.iter_content(chunk_size=1024*1024): | |
tmpfile.write(chunk) | |
audio_filename = tmpfile.name | |
return audio_filename, audio_bytes_io | |
def merge_audio_video(video_filename, audio_filename, output_filename): | |
print("Merging audio and video ...") | |
# Load the video file | |
video_clip = VideoFileClip(video_filename) | |
# Load the audio file | |
audio_clip = AudioFileClip(audio_filename) | |
# Determine the shortest duration between audio and video | |
min_duration = min(video_clip.duration, audio_clip.duration) | |
# Set the audio of the video clip as the audio file, trimming to the shortest duration | |
video_clip = video_clip.subclip(0, min_duration) | |
audio_clip = audio_clip.subclip(0, min_duration) | |
final_clip = video_clip.set_audio(audio_clip) | |
# Write the result to a file | |
final_clip.write_videofile(output_filename, codec='libx264', audio_codec="aac") | |
# Close the clips | |
video_clip.close() | |
audio_clip.close() | |
return output_filename | |
# def merge_audio_video(video_filename, audio_filename, output_filename): | |
# print("Merging audio and video ...") | |
# video_clip = VideoFileClip(video_filename) | |
# audio_clip = AudioFileClip(audio_filename) | |
# final_clip = video_clip.set_audio(audio_clip) | |
# final_clip.write_videofile(output_filename, codec='libx264', audio_codec="aac") | |
# video_clip.close() | |
# audio_clip.close() | |
# return output_filename | |
def main(): | |
st.set_page_config(page_title="AI Voiceover", page_icon="🔮") | |
st.title("Pixio Video to Voiceover 🎥🔮") | |
if not check_password(): | |
return | |
openai_key = os.getenv('OPENAI_API_KEY') | |
if not openai_key: | |
st.error("OpenAI API key is not set in .env.local") | |
return | |
uploaded_file = st.file_uploader("Select a video file", type=["mp4", "avi"]) | |
voice_options = { | |
'Echo (Male)': 'echo', | |
'Fable (Male)': 'fable', | |
'Onyx (Male)': 'onyx', | |
'Nova (Female)': 'nova', | |
'Shimmer (Female)': 'shimmer', | |
'Alloy (Female)': 'alloy' | |
} | |
option = st.selectbox('Choose the voice you want', list(voice_options.keys())) | |
classify = voice_options[option] | |
duration_options = list(range(10, 121, 10)) # 10 to 120 seconds, in 10 second intervals | |
selected_duration = st.selectbox('Select the desired video duration (seconds)', duration_options) | |
# New dropdown for script generator type | |
script_type_options = { | |
'Product Tutorial': 'Product Tutorial', | |
'TikTok': 'TikTok', | |
'YouTube Short': 'YouTube Short', | |
'Website Tutorial': 'Website Tutorial', | |
'General Info': 'General Info' | |
} | |
selected_script_type = st.selectbox('Choose the script generator type', list(script_type_options.keys())) | |
# Incorporating the selected script type and duration into the prompt | |
dynamic_prompt_intro = f"Script type: {selected_script_type}. Generate a voiceover script that is approximately {selected_duration} seconds long, tailored to the content and format of a {selected_script_type.lower()}." | |
prompt = st.text_area("Edit the voiceover script prompt as needed:", value=dynamic_prompt_intro, height=300) | |
if uploaded_file is not None and st.button("START PROCESSING", type="primary"): | |
with st.spinner("Video is being processed..."): | |
base64Frame, video_filename, video_duration = video_to_frames(uploaded_file, frame_sampling_rate=1) | |
if video_duration > selected_duration: | |
st.error(f"The video exceeds the selected duration of {selected_duration} seconds.") | |
return | |
text = frames_to_story(base64Frame, prompt, openai_key) | |
st.write(text) | |
audio_filename, audio_bytes_io = text_to_audio(text, openai_key, classify) | |
output_video_filename = os.path.splitext(video_filename)[0] + "_output.mp4" | |
final_video_filename = merge_audio_video(video_filename, audio_filename, output_video_filename) | |
st.video(final_video_filename) | |
os.unlink(video_filename) | |
os.unlink(audio_filename) | |
os.unlink(final_video_filename) | |
if __name__ == "__main__": | |
main() | |