File size: 9,228 Bytes
c14d84c
 
 
 
 
a46fd4b
 
 
 
 
 
 
 
c14d84c
 
 
125913a
c14d84c
 
 
 
 
 
 
 
 
125913a
c14d84c
 
 
 
125913a
c14d84c
125913a
c14d84c
 
 
 
 
 
125913a
c14d84c
 
 
125913a
c14d84c
 
125913a
 
c14d84c
125913a
c14d84c
 
125913a
c14d84c
e952cc2
a46fd4b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
745e3b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a46fd4b
 
 
 
c14d84c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
661f7c4
 
c14d84c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a46fd4b
e952cc2
a46fd4b
 
e952cc2
 
 
a46fd4b
 
 
 
 
 
 
 
 
 
 
 
 
e952cc2
 
 
a46fd4b
 
e952cc2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a46fd4b
 
e952cc2
a46fd4b
e952cc2
 
a46fd4b
 
 
 
 
 
e952cc2
 
 
 
 
a46fd4b
 
 
e952cc2
a46fd4b
 
 
 
 
 
 
 
e952cc2
 
a46fd4b
 
 
e952cc2
a46fd4b
 
 
 
 
e952cc2
 
 
 
 
c14d84c
 
 
 
 
a46fd4b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
import requests
import constants
import os
from PIL import Image
from gradio_client import Client
import moviepy.editor as mp
from moviepy.video.VideoClip import ImageClip
from moviepy.editor import AudioFileClip
from structured_output_extractor import StructuredOutputExtractor
from pydantic import BaseModel, Field
from typing import List
import tempfile
import os


def clean_response(result):
    print("\n\nStarted Cleaning Response")
    """A temporary fix to the output of predict which returns output of openai-whisper-large-v3-turbo as string
    but it outputs: AutomaticSpeechRecognitionOutput(text=" sometimes life   <- like this the class name still remains
    in the response, ideally which should have started from "sometimes..." as in the given example  """
    # Use find() to get the position of the start and end of the text
    start_pos = result.find('text="') + len('text="')  # Start after 'text="'
    end_pos = result.find('", chunks=None')  # End before '", chunks=None'

    # Extract the text using slicing
    cleaned_result = result[start_pos:end_pos]
    print("Returning Cleaned Result: ", cleaned_result)
    return cleaned_result


def get_translation(text: str):
    print('\n\nTranslating text: ', text, type(text))
    # Input payload
    data = {"text_input": text}

    # Headers for authentication
    headers = {"Authorization": f"Bearer {constants.HF_TOKEN}"}

    try:
        # Make a GET request
        response = requests.post(constants.TRANSLATION_ENDPOINT, json=data, headers=headers)
        # Process response
        if response.status_code == 200:
            response_data = response.json()
            print("Returning Translation")
            return response_data.get("output", "No output found.")
        else:
            print("Some Error Occured During Translation Request")
            print(response)
            print(f"Error: {response.status_code}, {response.text}")
            return {"error_occured" : response.text}
    except Exception as e:
        print(f"An exception occurred: {e}")
        return {"error_occured" : e}
    
 
def segments_to_chunks(segments):
    chunks = []
    for segment in segments:
        chunks.append(segment.get("text"))
    return chunks
    

def get_image_prompts(text_input : List):
        # Example Pydantic model (e.g., Movie)
    class ImagePromptResponseSchema(BaseModel):
        image_prompts: List[str] = Field(
            description="List of detailed image prompts, Each Image Prompt Per Chunk"
        )

    extractor = StructuredOutputExtractor(response_schema=ImagePromptResponseSchema)
    chunks_count = len(text_input)
    chunks = "chunk: " + "\nchunk: ".join(text_input)
    prompt = f"""ROLE: You are a Highly Experienced Image Prompt Sythesizer 

SYSTEM PROMPT:  

1. **Combine all chunks** to understand the complete context.  
2. **Identify the theme** and setting of the combined context.  
3. For each chunk, **generate a simple, context-aware image prompt** that fits the overall picture.  
   - Keep it clear and vivid, adding small details to enhance the visual.  


### Example  

**Chunks**:  
1. A guy went to the jungle.  
2. He saw a lion.  

**Combined Context**:  
"A man ventured into a jungle and encountered a lion."  

**Prompts**:  
- **Chunk 1**: "A man walking into a dense, green jungle, with tall trees and sunlight filtering through the leaves."  
- **Chunk 2**: "In a jungle clearing, a lion stands majestically, its golden mane glowing in the soft sunlight as it watches the man silently."  

NOTE: Never write a prompt that can generate NSFW images, or any other explicit content, use safe and appropriate prompts

TASK:  Generate {chunks_count} image prompts, Each per chunk\n\n {chunks}"""
    result = extractor.extract(prompt)
    return result.model_dump()   # returns dictionary version pydantic model
    
    



def generate_image(prompt, path='test_image.png'):
    try:
        # Initialize the Gradio Client with Hugging Face token
        client = Client(constants.IMAGE_GENERATION_SPACE_NAME, hf_token=constants.HF_TOKEN)

        # Make the API request
        result = client.predict(
            param_0=prompt,  # Text prompt for image generation
            api_name="/predict"
        )

        image = Image.open(result)
        image.save(path)

        # Return the result (which includes the URL or file path)
        return result

    except Exception as e:
        print(f"Error during image generation: {e}")
        return {"error": str(e)}
    
def generate_images(image_prompts, folder_name='test_folder'):
    folder_path = tmp_folder(folder_name)
    for index, prompt in enumerate(image_prompts):
        print(index, prompt)
        image_path = generate_image(prompt=prompt, path=f"{folder_path}/{index}.png")
        yield prompt, image_path
    


def tmp_folder(folder_name: str) -> str:
    # Use the current working directory or any other accessible path for temp folders
    base_tmp_path = os.path.join(os.getcwd(), "tmp_dir")  # Change this to any path you prefer
    
    # Ensure that the base temp folder exists
    if not os.path.exists(base_tmp_path):
        os.makedirs(base_tmp_path)
        print(f"Base temporary folder '{base_tmp_path}' created.")
    
    # Define the path for the specific temporary folder
    folder_path = os.path.join(base_tmp_path, folder_name)
    
    # Create the specific temporary folder if it doesn't exist
    os.makedirs(folder_path, exist_ok=True)
    
    print(f"Temporary folder '{folder_name}' is ready at {folder_path}.")
    
    return folder_path



from moviepy.editor import *


import os
import tempfile
from moviepy.editor import AudioFileClip, ImageClip, concatenate_videoclips


def generate_video(audio_file, images, segments):
    try:
        # Save the uploaded audio file to a temporary location
        file_extension = os.path.splitext(audio_file.name)[1]
        temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=f"{file_extension}")
        temp_audio_path.write(audio_file.read())
        temp_audio_path.close()

        # Load the audio file using MoviePy
        audio = AudioFileClip(temp_audio_path.name)

        # Define YouTube-like dimensions (16:9 aspect ratio)
        frame_width = 1280
        frame_height = 720

        video_clips = []
        total_segments = len(segments)

        for i, current_segment in enumerate(segments):
            start_time = current_segment["start"]
            end_time = current_segment["end"]
            
            # Calculate the actual duration including any gap until the next segment
            if i < total_segments - 1:
                # If there's a next segment, extend until it starts
                next_segment = segments[i + 1]
                actual_end_time = next_segment["start"]
            else:
                # For the last segment, use its end time
                actual_end_time = end_time
            
            # Calculate total duration including any gap
            segment_duration = actual_end_time - start_time
            
            print(f"\nProcessing segment {i + 1}/{total_segments}:")
            print(f"  Start time: {start_time}s")
            print(f"  Base end time: {end_time}s")
            print(f"  Actual end time: {actual_end_time}s")
            print(f"  Total duration: {segment_duration}s")
            print(f"  Text: '{current_segment['text']}'")
            
            # Ensure the image index is within bounds
            image_path = images[min(i, len(images) - 1)]
            
            # Create an ImageClip for the current segment
            image_clip = ImageClip(image_path)
            
            # Resize and pad the image to fit a 16:9 aspect ratio
            image_clip = image_clip.resize(height=frame_height).on_color(
                size=(frame_width, frame_height),
                color=(0, 0, 0),  # Black background
                pos="center"      # Center the image
            )
            
            # Set the duration and start time for the clip
            image_clip = image_clip.set_duration(segment_duration)
            image_clip = image_clip.set_start(start_time)  # Set the start time explicitly
            
            video_clips.append(image_clip)

        # Concatenate all the image clips to form the video
        print("Concatenating video clips...")
        video = concatenate_videoclips(video_clips, method="compose")

        # Add the audio to the video
        video = video.set_audio(audio)

        # Save the video to a temporary file
        temp_dir = tempfile.gettempdir()
        video_path = os.path.join(temp_dir, "generated_video.mp4")
        print(f"Writing video file to {video_path}...")
        video.write_videofile(video_path, fps=30, codec="libx264", audio_codec="aac")

        # Clean up the temporary audio file
        os.remove(temp_audio_path.name)
        print("Temporary audio file removed.")

        return video_path

    except Exception as e:
        print(f"Error generating video: {e}")
        return None






# Example usage:
if __name__ == "__main__":
    result = generate_images(["a guy in jungle", "a waterfall","greenery"])