Spaces:

awacke1
/

AI-MovieMaker-Comedy

Running

App Files Files Community

AI-MovieMaker-Comedy / app.py

awacke1

Update app.py

a0010c7 verified 9 months ago

raw

history blame

5.08 kB

	import gradio as gr
	import moviepy.video.io.ImageSequenceClip
	from PIL import Image
	from pydub import AudioSegment
	from moviepy.editor import ImageSequenceClip, VideoFileClip, AudioFileClip
	import numpy as np
	import os
	from mutagen.mp3 import MP3
	import soundfile as sf
	from dotenv import load_dotenv
	from transformers import AutoProcessor, AutoModel
	import torch
	import tempfile

	# Load environment variables
	load_dotenv()
	HF_TOKEN = os.getenv("API_KEY")

	def cleanup_temp_files():
	temp_files = [
	os.path.join(tempfile.gettempdir(), 'speech_output.flac'),
	os.path.join(tempfile.gettempdir(), 'audio.mp3'),
	os.path.join(tempfile.gettempdir(), 'my_vid_tmp.mp4'),
	os.path.join(tempfile.gettempdir(), 'mergedvideo.mp4')
	]
	for file in temp_files:
	if os.path.exists(file):
	try:
	os.remove(file)
	except:
	pass

	def resize(img_list):
	resize_img_list = []
	for item in img_list:
	im = Image.open(item)
	imResize = im.resize((256, 256), Image.LANCZOS)
	resize_img_list.append(np.array(imResize))
	return resize_img_list

	def text2speech(text):
	try:
	processor = AutoProcessor.from_pretrained("microsoft/speecht5_tts")
	model = AutoModel.from_pretrained("microsoft/speecht5_tts")

	inputs = processor(text=text, return_tensors="pt")
	speaker_embeddings = torch.zeros((1, model.config.speaker_embedding_size))
	speech = model.generate_speech(inputs["input_ids"], speaker_embeddings)

	output_path = os.path.join(tempfile.gettempdir(), "speech_output.flac")
	sf.write(output_path, speech.numpy(), samplerate=16000)
	return output_path
	except Exception as e:
	print(f"Error in text2speech: {str(e)}")
	raise

	def merge_audio_video(entities_num, resize_img_list, text_input):
	try:
	speech = text2speech(text_input)
	wav_audio = AudioSegment.from_file(speech, "flac")

	audio_path = os.path.join(tempfile.gettempdir(), "audio.mp3")
	wav_audio.export(audio_path, format="mp3")

	audio_length = int(MP3(audio_path).info.length)
	fps = max(entities_num / audio_length, 1) # Ensure fps is at least 1
	fps = float(format(fps, '.5f'))

	temp_video = os.path.join(tempfile.gettempdir(), "my_vid_tmp.mp4")
	clip = ImageSequenceClip(resize_img_list, fps=fps)
	clip.write_videofile(temp_video, codec='libx264', fps=fps)

	videoclip = VideoFileClip(temp_video)
	audioclip = AudioFileClip(audio_path)
	mergedclip = videoclip.set_audio(audioclip)

	output_path = os.path.join(tempfile.gettempdir(), "mergedvideo.mp4")
	mergedclip.write_videofile(output_path)

	# Clean up clips
	videoclip.close()
	audioclip.close()
	mergedclip.close()

	return output_path
	except Exception as e:
	print(f"Error in merge_audio_video: {str(e)}")
	raise
	finally:
	cleanup_temp_files()

	# Load models outside the Blocks context
	ner = gr.load("huggingface/flair/ner-english-ontonotes-large")
	latentdiffusion = gr.load("spaces/multimodalart/latentdiffusion")

	def engine(text_input):
	try:
	entities = ner(text_input)
	entities = [tupl for tupl in entities if None not in tupl]
	entities_num = len(entities)

	if entities_num == 0:
	raise ValueError("No entities found in the input text")

	img_list = []
	for ent in entities:
	img = latentdiffusion(ent[0], '50', '256', '256', '1', 10)[0]
	img_list.append(img)

	resize_img_list = resize(img_list)
	output_path = merge_audio_video(entities_num, resize_img_list, text_input)

	return output_path
	except Exception as e:
	print(f"Error in engine: {str(e)}")
	raise gr.Error(f"An error occurred: {str(e)}")
	finally:
	cleanup_temp_files()

	with gr.Blocks() as app:
	gr.Markdown("# AI Pipeline Multi Model 🎭🎞️🍿 Movie Maker 🎬 🧠 🎨")
	gr.Markdown("<div>🎭🎞️🍿 AI Movie Maker - Comedy 🎬 🧠 🎨</div>")
	text_input = gr.Textbox(lines=5, label="Input Text")
	output_video = gr.Video(label='Final Merged Video')
	examples = gr.Examples(
	examples=[
	["Two space marines take up arms to save the planet from an alien invasion. These two dashing strong men play a comedic role in the science fiction movie of the future where even Barnaby bunny is willing to join their wacky gang of space marines to save the planet with good looks and comedy."]
	],
	inputs=text_input
	)
	submit_button = gr.Button("Generate Video")
	submit_button.click(fn=engine, inputs=text_input, outputs=output_video)

	gr.Markdown("<br><div></div>")

	app.launch(
	debug=True,
	share=True, # Enable sharing
	server_name="0.0.0.0", # Listen on all interfaces
	server_port=7860 # Specify port
	)