audio-to-video-generator

Running

App Files Files Community

audio-to-video-generator / utils.py

wower99

video generation feature v1 is functional

a46fd4b 6 months ago

raw

history blame

9.91 kB


	import requests
	import constants
	import os
	from PIL import Image
	from gradio_client import Client
	import moviepy.editor as mp
	from moviepy.video.VideoClip import ImageClip
	from moviepy.editor import AudioFileClip
	from structured_output_extractor import StructuredOutputExtractor
	from pydantic import BaseModel, Field
	from typing import List
	import tempfile
	import os


	def clean_response(result):
	print("\n\nStarted Cleaning Response")
	"""A temporary fix to the output of predict which returns output of openai-whisper-large-v3-turbo as string
	but it outputs: AutomaticSpeechRecognitionOutput(text=" sometimes life <- like this the class name still remains
	in the response, ideally which should have started from "sometimes..." as in the given example """
	# Use find() to get the position of the start and end of the text
	start_pos = result.find('text="') + len('text="') # Start after 'text="'
	end_pos = result.find('", chunks=None') # End before '", chunks=None'

	# Extract the text using slicing
	cleaned_result = result[start_pos:end_pos]
	print("Returning Cleaned Result: ", cleaned_result)
	return cleaned_result


	def get_translation(text: str):
	print('\n\nTranslating text: ', text, type(text))
	# Input payload
	data = {"text_input": text}

	# Headers for authentication
	headers = {"Authorization": f"Bearer {constants.HF_TOKEN}"}

	try:
	# Make a GET request
	response = requests.post(constants.TRANSLATION_ENDPOINT, json=data, headers=headers)
	# Process response
	if response.status_code == 200:
	response_data = response.json()
	print("Returning Translation")
	return response_data.get("output", "No output found.")
	else:
	print("Some Error Occured During Translation Request")
	print(response)
	print(f"Error: {response.status_code}, {response.text}")
	return {"error_occured" : response.text}
	except Exception as e:
	print(f"An exception occurred: {e}")
	return {"error_occured" : e}



	def old_get_image_prompts(text_input):
	headers = {
	"Authorization": f"Bearer {constants.HF_TOKEN}", # Replace with your token
	"Content-Type": "application/json" # Optional, ensures JSON payload
	}

	endpoint = f"{constants.PROMPT_GENERATION_ENDPOINT}"
	payload = {"text_input": text_input}

	try:
	# Send the POST request
	print("making post request for image prompts", endpoint)
	response = requests.post(endpoint, json=payload, headers=headers)

	# Raise an exception for HTTP errors
	response.raise_for_status()

	# Parse JSON response
	result = response.json()
	return result

	except requests.exceptions.RequestException as e:
	print(f"Error during request: {e}")
	return {"error": str(e)}

	def segments_to_chunks(segments):
	chunks = []
	for segment in segments:
	chunks.append(segment.get("text"))
	return chunks


	def get_image_prompts(text_input : List):
	# Example Pydantic model (e.g., Movie)
	class ImagePromptResponseSchema(BaseModel):
	image_prompts: List[str] = Field(
	description="List of detailed image prompts, Each Image Prompt Per Chunk"
	)

	extractor = StructuredOutputExtractor(response_schema=ImagePromptResponseSchema)
	chunks_count = len(text_input)
	chunks = "chunk: " + "\nchunk: ".join(text_input)
	prompt = f"""ROLE: You are a Highly Experienced Image Prompt Sythesizer
	TASK: Generate {chunks_count} image prompts, Each per chunk\n\n {chunks}"""
	result = extractor.extract(prompt)
	return result.model_dump() # returns dictionary version pydantic model





	def generate_image(prompt, path='test_image.png'):
	try:
	# Initialize the Gradio Client with Hugging Face token
	client = Client(constants.IMAGE_GENERATION_SPACE_NAME, hf_token=constants.HF_TOKEN)

	# Make the API request
	result = client.predict(
	param_0=prompt, # Text prompt for image generation
	api_name="/predict"
	)

	image = Image.open(result)
	image.save(path)

	# Return the result (which includes the URL or file path)
	return result

	except Exception as e:
	print(f"Error during image generation: {e}")
	return {"error": str(e)}

	def generate_images(image_prompts, folder_name='test_folder'):
	folder_path = tmp_folder(folder_name)
	for index, prompt in enumerate(image_prompts):
	print(index, prompt)
	image_path = generate_image(prompt=prompt, path=f"{folder_path}/{index}.png")
	yield prompt, image_path



	def tmp_folder(folder_name: str) -> str:
	# Use the current working directory or any other accessible path for temp folders
	base_tmp_path = os.path.join(os.getcwd(), "tmp_dir") # Change this to any path you prefer

	# Ensure that the base temp folder exists
	if not os.path.exists(base_tmp_path):
	os.makedirs(base_tmp_path)
	print(f"Base temporary folder '{base_tmp_path}' created.")

	# Define the path for the specific temporary folder
	folder_path = os.path.join(base_tmp_path, folder_name)

	# Create the specific temporary folder if it doesn't exist
	os.makedirs(folder_path, exist_ok=True)

	print(f"Temporary folder '{folder_name}' is ready at {folder_path}.")

	return folder_path



	def old_generate_video(audio_file, images, segments):
	print(f"images: {images}")
	print(f"segments: {segments}")
	print(f"audio file: {audio_file.name}")
	try:
	# Save the uploaded audio file to a temporary location
	file_extension = os.path.splitext(audio_file.name)[1]
	temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=f"{file_extension}")
	temp_audio_path.write(audio_file.read())
	temp_audio_path.close()

	# Load the audio file using MoviePy
	audio = mp.AudioFileClip(temp_audio_path.name)
	audio_duration = audio.duration

	# Create video clips for each segment using the corresponding image
	video_clips = []
	for i, segment in enumerate(segments):
	start_time = segment["start"]
	end_time = segment["end"]

	# Ensure the image index is within bounds
	image_path = images[min(i, len(images) - 1)]

	# Create an ImageClip for the current segment
	image_clip = ImageClip(image_path, duration=end_time - start_time)
	image_clip = image_clip.set_start(start_time).set_end(end_time)
	video_clips.append(image_clip)

	# Concatenate all the image clips to form the video
	video = mp.concatenate_videoclips(video_clips, method="compose")

	# Add the audio to the video
	video = video.set_audio(audio)

	# Save the video to a temporary file
	temp_dir = tempfile.gettempdir()
	video_path = os.path.join(temp_dir, "generated_video.mp4")
	video.write_videofile(video_path, fps=24, codec="libx264", audio_codec="aac")

	# Clean up the temporary audio file
	os.remove(temp_audio_path.name)

	return video_path

	except Exception as e:
	print(f"Error generating video: {e}")
	return


	from moviepy.editor import *

	def generate_video(audio_file, images, segments):
	print(f"images: {images}")
	print(f"segments: {segments}")
	print(f"audio file: {audio_file.name}")
	try:
	# Save the uploaded audio file to a temporary location
	file_extension = os.path.splitext(audio_file.name)[1]
	temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=f"{file_extension}")
	temp_audio_path.write(audio_file.read())
	temp_audio_path.close()

	# Load the audio file using MoviePy
	audio = AudioFileClip(temp_audio_path.name)
	audio_duration = audio.duration

	# Define YouTube-like dimensions (16:9 aspect ratio, e.g., 1920x1080)
	frame_width = 1920
	frame_height = 1080

	# Create video clips for each segment using the corresponding image
	video_clips = []
	for i, segment in enumerate(segments):
	start_time = segment["start"]
	end_time = segment["end"]

	# Ensure the image index is within bounds
	image_path = images[min(i, len(images) - 1)]

	# Create an ImageClip for the current segment
	image_clip = ImageClip(image_path, duration=end_time - start_time)

	# Resize and pad the image to fit a 16:9 aspect ratio
	image_clip = image_clip.resize(height=frame_height).on_color(
	size=(frame_width, frame_height),
	color=(0, 0, 0), # Black background
	pos="center" # Center the image
	)

	# Set the timing of the clip
	image_clip = image_clip.set_start(start_time).set_end(end_time)
	video_clips.append(image_clip)

	# Concatenate all the image clips to form the video
	video = concatenate_videoclips(video_clips, method="compose")

	# Add the audio to the video
	video = video.set_audio(audio)

	# Save the video to a temporary file
	temp_dir = tempfile.gettempdir()
	video_path = os.path.join(temp_dir, "generated_video.mp4")
	video.write_videofile(video_path, fps=24, codec="libx264", audio_codec="aac")

	# Clean up the temporary audio file
	os.remove(temp_audio_path.name)

	return video_path

	except Exception as e:
	print(f"Error generating video: {e}")
	return


	# Example usage:
	if __name__ == "__main__":
	result = generate_images(["a guy in jungle", "a waterfall","greenery"])