audio-to-video-generator

Running

File size: 8,895 Bytes

c14d84c
 
 
 
 
a46fd4b
 
 
 
 
 
 
 
c14d84c
 
4e4c3a4
 
c14d84c
125913a
c14d84c
 
 
 
 
 
4e4c3a4
c14d84c
 
 
4e4c3a4
c14d84c
 
4e4c3a4
125913a
c14d84c
125913a
c14d84c
 
125913a
c14d84c
e952cc2
a46fd4b
 
 
 
 
 
 
4e4c3a4
 
a46fd4b
 
 
 
 
 
 
 
 
4e4c3a4
 
 
745e3b9
4e4c3a4
28f3522
4e4c3a4
acede7f
784ffea
cdc14fc
5a4ef7f
745e3b9
 
 
4e4c3a4
745e3b9
 
 
 
 
 
 
 
5a4ef7f
 
745e3b9
 
 
4e4c3a4
a46fd4b
 
 
 
c14d84c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
661f7c4
 
c14d84c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a46fd4b
e952cc2
a46fd4b
 
e952cc2
 
 
a46fd4b
 
 
 
 
 
 
 
 
 
 
 
 
e952cc2
 
 
a46fd4b
 
e952cc2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a46fd4b
 
e952cc2
a46fd4b
e952cc2
 
a46fd4b
 
 
 
 
 
e952cc2
 
 
 
 
a46fd4b
 
 
e952cc2
a46fd4b
 
 
 
 
 
 
 
e952cc2
 
a46fd4b
 
 
e952cc2
a46fd4b
 
 
 
 
e952cc2
 
 
 
 
c14d84c
 
 
 
 
a46fd4b

import requests
import constants
import os
from PIL import Image
from gradio_client import Client
import moviepy.editor as mp
from moviepy.video.VideoClip import ImageClip
from moviepy.editor import AudioFileClip
from structured_output_extractor import StructuredOutputExtractor
from pydantic import BaseModel, Field
from typing import List
import tempfile
import os


def get_summarization(text: str):
    print('\n\nSummarizing text: ', text, type(text))
    # Input payload
    data = {"text_input": text}

    # Headers for authentication
    headers = {"Authorization": f"Bearer {constants.HF_TOKEN}"}

    try:
        # Make a GET request
        response = requests.post(constants.SUMMARIZATION_ENDPOINT, json=data, headers=headers)
        # Process response
        if response.status_code == 200:
            response_data = response.json()
            print("Returning Summarization")
            return response_data.get("output", "No output found.")
        else:
            print("Some Error Occured During Summarization Request")
            print(response)
            print(f"Error: {response.status_code}, {response.text}")
            return {"error_occured" : response.text}
    except Exception as e:
        print(f"An exception occurred: {e}")
        return {"error_occured" : e}
    
 
def segments_to_chunks(segments):
    chunks = []
    for segment in segments:
        chunks.append(segment.get("text"))
    return chunks
    

def get_image_prompts(text_input : List, summary):
    print(f"summary: {summary}")
        # Example Pydantic model (e.g., Movie)
    class ImagePromptResponseSchema(BaseModel):
        image_prompts: List[str] = Field(
            description="List of detailed image prompts, Each Image Prompt Per Chunk"
        )

    extractor = StructuredOutputExtractor(response_schema=ImagePromptResponseSchema)
    chunks_count = len(text_input)
    chunks = "chunk: " + "\nchunk: ".join(text_input)
    prompt = f"""
    
ROLE: You are a Highly Experienced Image Prompt Sythesizer 

SYSTEM PROMPT:  Given the Overall Summary and All Chunks of the Text
1. Use Summary and Combined context of all chunks because if you read all chunks in a sequence it is the script
3. **Identify the theme** and setting of the complete text
4. For each chunk read the chunk and its summary, then create a simple, focused Context-aware image prompt based on key visual elements from both
5. Keep Image Style as 3D (MUST BE FOLLOWED)
6. Negatives: Hyper-Realistic, Real Life Human
7. Ensure that concsistent theme follows over the whole script 


### Example  
summary: this text is a story of guy who went to jungle and a lion
**Chunks**:  
1. A guy went to the jungle.  
2. He saw a lion.  

**Combined Context**:  
"A man ventured into a jungle and encountered a lion."  

**Prompts**:  
- **Chunk 1**: "[style: 3D| theme: dark jungle] A man walking into a dense, green jungle, with tall trees and sunlight filtering through the leaves."  
- **Chunk 2**: "[style: 3D| theme: dark jungle] In a jungle clearing, a lion stands majestically, its golden mane glowing in the soft sunlight as it watches the man silently."  

NOTE: Never write a prompt that can generate NSFW images, or any other explicit content, use safe and appropriate prompts

TASK:  Here is the summary: {summary}\n\n and \n\n Total of {chunks_count} chunks, Generate an Image Prompt Each per chunk\n\n {chunks}"""
    result = extractor.extract(prompt)
    return result.model_dump()   # returns dictionary version pydantic model
    
    



def generate_image(prompt, path='test_image.png'):
    try:
        # Initialize the Gradio Client with Hugging Face token
        client = Client(constants.IMAGE_GENERATION_SPACE_NAME, hf_token=constants.HF_TOKEN)

        # Make the API request
        result = client.predict(
            param_0=prompt,  # Text prompt for image generation
            api_name="/predict"
        )

        image = Image.open(result)
        image.save(path)

        # Return the result (which includes the URL or file path)
        return result

    except Exception as e:
        print(f"Error during image generation: {e}")
        return {"error": str(e)}
    
def generate_images(image_prompts, folder_name='test_folder'):
    folder_path = tmp_folder(folder_name)
    for index, prompt in enumerate(image_prompts):
        print(index, prompt)
        image_path = generate_image(prompt=prompt, path=f"{folder_path}/{index}.png")
        yield prompt, image_path
    


def tmp_folder(folder_name: str) -> str:
    # Use the current working directory or any other accessible path for temp folders
    base_tmp_path = os.path.join(os.getcwd(), "tmp_dir")  # Change this to any path you prefer
    
    # Ensure that the base temp folder exists
    if not os.path.exists(base_tmp_path):
        os.makedirs(base_tmp_path)
        print(f"Base temporary folder '{base_tmp_path}' created.")
    
    # Define the path for the specific temporary folder
    folder_path = os.path.join(base_tmp_path, folder_name)
    
    # Create the specific temporary folder if it doesn't exist
    os.makedirs(folder_path, exist_ok=True)
    
    print(f"Temporary folder '{folder_name}' is ready at {folder_path}.")
    
    return folder_path



from moviepy.editor import *


import os
import tempfile
from moviepy.editor import AudioFileClip, ImageClip, concatenate_videoclips


def generate_video(audio_file, images, segments):
    try:
        # Save the uploaded audio file to a temporary location
        file_extension = os.path.splitext(audio_file.name)[1]
        temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=f"{file_extension}")
        temp_audio_path.write(audio_file.read())
        temp_audio_path.close()

        # Load the audio file using MoviePy
        audio = AudioFileClip(temp_audio_path.name)

        # Define YouTube-like dimensions (16:9 aspect ratio)
        frame_width = 1280
        frame_height = 720

        video_clips = []
        total_segments = len(segments)

        for i, current_segment in enumerate(segments):
            start_time = current_segment["start"]
            end_time = current_segment["end"]
            
            # Calculate the actual duration including any gap until the next segment
            if i < total_segments - 1:
                # If there's a next segment, extend until it starts
                next_segment = segments[i + 1]
                actual_end_time = next_segment["start"]
            else:
                # For the last segment, use its end time
                actual_end_time = end_time
            
            # Calculate total duration including any gap
            segment_duration = actual_end_time - start_time
            
            print(f"\nProcessing segment {i + 1}/{total_segments}:")
            print(f"  Start time: {start_time}s")
            print(f"  Base end time: {end_time}s")
            print(f"  Actual end time: {actual_end_time}s")
            print(f"  Total duration: {segment_duration}s")
            print(f"  Text: '{current_segment['text']}'")
            
            # Ensure the image index is within bounds
            image_path = images[min(i, len(images) - 1)]
            
            # Create an ImageClip for the current segment
            image_clip = ImageClip(image_path)
            
            # Resize and pad the image to fit a 16:9 aspect ratio
            image_clip = image_clip.resize(height=frame_height).on_color(
                size=(frame_width, frame_height),
                color=(0, 0, 0),  # Black background
                pos="center"      # Center the image
            )
            
            # Set the duration and start time for the clip
            image_clip = image_clip.set_duration(segment_duration)
            image_clip = image_clip.set_start(start_time)  # Set the start time explicitly
            
            video_clips.append(image_clip)

        # Concatenate all the image clips to form the video
        print("Concatenating video clips...")
        video = concatenate_videoclips(video_clips, method="compose")

        # Add the audio to the video
        video = video.set_audio(audio)

        # Save the video to a temporary file
        temp_dir = tempfile.gettempdir()
        video_path = os.path.join(temp_dir, "generated_video.mp4")
        print(f"Writing video file to {video_path}...")
        video.write_videofile(video_path, fps=30, codec="libx264", audio_codec="aac")

        # Clean up the temporary audio file
        os.remove(temp_audio_path.name)
        print("Temporary audio file removed.")

        return video_path

    except Exception as e:
        print(f"Error generating video: {e}")
        return None






# Example usage:
if __name__ == "__main__":
    result = generate_images(["a guy in jungle", "a waterfall","greenery"])