audio-to-video-generator

Running

App Files Files Community

audio-to-video-generator / utils.py

habib926653

Update utils.py

28f3522 verified 5 months ago

raw

history blame

8.9 kB

	import requests
	import constants
	import os
	from PIL import Image
	from gradio_client import Client
	import moviepy.editor as mp
	from moviepy.video.VideoClip import ImageClip
	from moviepy.editor import AudioFileClip
	from structured_output_extractor import StructuredOutputExtractor
	from pydantic import BaseModel, Field
	from typing import List
	import tempfile
	import os


	def get_summarization(text: str):
	print('\n\nSummarizing text: ', text, type(text))
	# Input payload
	data = {"text_input": text}

	# Headers for authentication
	headers = {"Authorization": f"Bearer {constants.HF_TOKEN}"}

	try:
	# Make a GET request
	response = requests.post(constants.SUMMARIZATION_ENDPOINT, json=data, headers=headers)
	# Process response
	if response.status_code == 200:
	response_data = response.json()
	print("Returning Summarization")
	return response_data.get("output", "No output found.")
	else:
	print("Some Error Occured During Summarization Request")
	print(response)
	print(f"Error: {response.status_code}, {response.text}")
	return {"error_occured" : response.text}
	except Exception as e:
	print(f"An exception occurred: {e}")
	return {"error_occured" : e}


	def segments_to_chunks(segments):
	chunks = []
	for segment in segments:
	chunks.append(segment.get("text"))
	return chunks


	def get_image_prompts(text_input : List, summary):
	print(f"summary: {summary}")
	# Example Pydantic model (e.g., Movie)
	class ImagePromptResponseSchema(BaseModel):
	image_prompts: List[str] = Field(
	description="List of detailed image prompts, Each Image Prompt Per Chunk"
	)

	extractor = StructuredOutputExtractor(response_schema=ImagePromptResponseSchema)
	chunks_count = len(text_input)
	chunks = "chunk: " + "\nchunk: ".join(text_input)
	prompt = f"""

	ROLE: You are a Highly Experienced Image Prompt Sythesizer

	SYSTEM PROMPT: Given the Overall Summary and All Chunks of the Text
	1. Use Summary and Combined context of all chunks because if you read all chunks in a sequence it is the script
	3. Identify the theme and setting of the complete text
	4. For each chunk read the chunk and its summary, then create a simple, focused Context-aware image prompt based on key visual elements from both
	5. Keep Image Style as 3D (MUST BE FOLLOWED)
	6. Negatives: Hyper-Realistic, Real Life Human
	7. Ensure that concsistent theme follows over the whole script


	### Example
	summary: this text is a story of guy who went to jungle and a lion
	Chunks:
	1. A guy went to the jungle.
	2. He saw a lion.

	Combined Context:
	"A man ventured into a jungle and encountered a lion."

	Prompts:
	- Chunk 1: "[style: 3D\| theme: dark jungle] A man walking into a dense, green jungle, with tall trees and sunlight filtering through the leaves."
	- Chunk 2: "[style: 3D\| theme: dark jungle] In a jungle clearing, a lion stands majestically, its golden mane glowing in the soft sunlight as it watches the man silently."

	NOTE: Never write a prompt that can generate NSFW images, or any other explicit content, use safe and appropriate prompts

	TASK: Here is the summary: {summary}\n\n and \n\n Total of {chunks_count} chunks, Generate an Image Prompt Each per chunk\n\n {chunks}"""
	result = extractor.extract(prompt)
	return result.model_dump() # returns dictionary version pydantic model





	def generate_image(prompt, path='test_image.png'):
	try:
	# Initialize the Gradio Client with Hugging Face token
	client = Client(constants.IMAGE_GENERATION_SPACE_NAME, hf_token=constants.HF_TOKEN)

	# Make the API request
	result = client.predict(
	param_0=prompt, # Text prompt for image generation
	api_name="/predict"
	)

	image = Image.open(result)
	image.save(path)

	# Return the result (which includes the URL or file path)
	return result

	except Exception as e:
	print(f"Error during image generation: {e}")
	return {"error": str(e)}

	def generate_images(image_prompts, folder_name='test_folder'):
	folder_path = tmp_folder(folder_name)
	for index, prompt in enumerate(image_prompts):
	print(index, prompt)
	image_path = generate_image(prompt=prompt, path=f"{folder_path}/{index}.png")
	yield prompt, image_path



	def tmp_folder(folder_name: str) -> str:
	# Use the current working directory or any other accessible path for temp folders
	base_tmp_path = os.path.join(os.getcwd(), "tmp_dir") # Change this to any path you prefer

	# Ensure that the base temp folder exists
	if not os.path.exists(base_tmp_path):
	os.makedirs(base_tmp_path)
	print(f"Base temporary folder '{base_tmp_path}' created.")

	# Define the path for the specific temporary folder
	folder_path = os.path.join(base_tmp_path, folder_name)

	# Create the specific temporary folder if it doesn't exist
	os.makedirs(folder_path, exist_ok=True)

	print(f"Temporary folder '{folder_name}' is ready at {folder_path}.")

	return folder_path



	from moviepy.editor import *


	import os
	import tempfile
	from moviepy.editor import AudioFileClip, ImageClip, concatenate_videoclips


	def generate_video(audio_file, images, segments):
	try:
	# Save the uploaded audio file to a temporary location
	file_extension = os.path.splitext(audio_file.name)[1]
	temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=f"{file_extension}")
	temp_audio_path.write(audio_file.read())
	temp_audio_path.close()

	# Load the audio file using MoviePy
	audio = AudioFileClip(temp_audio_path.name)

	# Define YouTube-like dimensions (16:9 aspect ratio)
	frame_width = 1280
	frame_height = 720

	video_clips = []
	total_segments = len(segments)

	for i, current_segment in enumerate(segments):
	start_time = current_segment["start"]
	end_time = current_segment["end"]

	# Calculate the actual duration including any gap until the next segment
	if i < total_segments - 1:
	# If there's a next segment, extend until it starts
	next_segment = segments[i + 1]
	actual_end_time = next_segment["start"]
	else:
	# For the last segment, use its end time
	actual_end_time = end_time

	# Calculate total duration including any gap
	segment_duration = actual_end_time - start_time

	print(f"\nProcessing segment {i + 1}/{total_segments}:")
	print(f" Start time: {start_time}s")
	print(f" Base end time: {end_time}s")
	print(f" Actual end time: {actual_end_time}s")
	print(f" Total duration: {segment_duration}s")
	print(f" Text: '{current_segment['text']}'")

	# Ensure the image index is within bounds
	image_path = images[min(i, len(images) - 1)]

	# Create an ImageClip for the current segment
	image_clip = ImageClip(image_path)

	# Resize and pad the image to fit a 16:9 aspect ratio
	image_clip = image_clip.resize(height=frame_height).on_color(
	size=(frame_width, frame_height),
	color=(0, 0, 0), # Black background
	pos="center" # Center the image
	)

	# Set the duration and start time for the clip
	image_clip = image_clip.set_duration(segment_duration)
	image_clip = image_clip.set_start(start_time) # Set the start time explicitly

	video_clips.append(image_clip)

	# Concatenate all the image clips to form the video
	print("Concatenating video clips...")
	video = concatenate_videoclips(video_clips, method="compose")

	# Add the audio to the video
	video = video.set_audio(audio)

	# Save the video to a temporary file
	temp_dir = tempfile.gettempdir()
	video_path = os.path.join(temp_dir, "generated_video.mp4")
	print(f"Writing video file to {video_path}...")
	video.write_videofile(video_path, fps=30, codec="libx264", audio_codec="aac")

	# Clean up the temporary audio file
	os.remove(temp_audio_path.name)
	print("Temporary audio file removed.")

	return video_path

	except Exception as e:
	print(f"Error generating video: {e}")
	return None






	# Example usage:
	if __name__ == "__main__":
	result = generate_images(["a guy in jungle", "a waterfall","greenery"])