|
import requests |
|
import constants |
|
import os |
|
from PIL import Image |
|
from gradio_client import Client |
|
import moviepy.editor as mp |
|
from moviepy.video.VideoClip import ImageClip |
|
from moviepy.editor import AudioFileClip |
|
from structured_output_extractor import StructuredOutputExtractor |
|
from pydantic import BaseModel, Field |
|
from typing import List |
|
import tempfile |
|
import os |
|
|
|
|
|
def get_summarization(text: str): |
|
print('\n\nSummarizing text: ', text, type(text)) |
|
|
|
data = {"text_input": text} |
|
|
|
|
|
headers = {"Authorization": f"Bearer {constants.HF_TOKEN}"} |
|
|
|
try: |
|
|
|
response = requests.post(constants.SUMMARIZATION_ENDPOINT, json=data, headers=headers) |
|
|
|
if response.status_code == 200: |
|
response_data = response.json() |
|
print("Returning Summarization") |
|
return response_data.get("output", "No output found.") |
|
else: |
|
print("Some Error Occured During Summarization Request") |
|
print(response) |
|
print(f"Error: {response.status_code}, {response.text}") |
|
return {"error_occured" : response.text} |
|
except Exception as e: |
|
print(f"An exception occurred: {e}") |
|
return {"error_occured" : e} |
|
|
|
|
|
def segments_to_chunks(segments): |
|
chunks = [] |
|
for segment in segments: |
|
chunks.append(segment.get("text")) |
|
return chunks |
|
|
|
|
|
def get_image_prompts(text_input : List, summary): |
|
print(f"summary: {summary}") |
|
|
|
class ImagePromptResponseSchema(BaseModel): |
|
image_prompts: List[str] = Field( |
|
description="List of detailed image prompts, Each Image Prompt Per Chunk" |
|
) |
|
|
|
extractor = StructuredOutputExtractor(response_schema=ImagePromptResponseSchema) |
|
chunks_count = len(text_input) |
|
chunks = "chunk: " + "\nchunk: ".join(text_input) |
|
prompt = f""" |
|
|
|
ROLE: You are a Highly Experienced Image Prompt Sythesizer |
|
|
|
SYSTEM PROMPT: Given the Overall Summary and All Chunks of the Text |
|
1. Use Summary and Combined context of all chunks because if you read all chunks in a sequence it is the script |
|
3. **Identify the theme** and setting of the complete text |
|
4. For each chunk read the chunk and its summary, then create a simple, focused Context-aware image prompt based on key visual elements from both |
|
5. Keep Image Style as 3D (MUST BE FOLLOWED) |
|
6. Negatives: Hyper-Realistic, Real Life Human |
|
7. Ensure that concsistent theme follows over the whole script |
|
|
|
|
|
### Example |
|
summary: this text is a story of guy who went to jungle and a lion |
|
**Chunks**: |
|
1. A guy went to the jungle. |
|
2. He saw a lion. |
|
|
|
**Combined Context**: |
|
"A man ventured into a jungle and encountered a lion." |
|
|
|
**Prompts**: |
|
- **Chunk 1**: "[style: 3D| theme: dark jungle] A man walking into a dense, green jungle, with tall trees and sunlight filtering through the leaves." |
|
- **Chunk 2**: "[style: 3D| theme: dark jungle] In a jungle clearing, a lion stands majestically, its golden mane glowing in the soft sunlight as it watches the man silently." |
|
|
|
NOTE: Never write a prompt that can generate NSFW images, or any other explicit content, use safe and appropriate prompts |
|
|
|
TASK: Here is the summary: {summary}\n\n and \n\n Total of {chunks_count} chunks, Generate an Image Prompt Each per chunk\n\n {chunks}""" |
|
result = extractor.extract(prompt) |
|
return result.model_dump() |
|
|
|
|
|
|
|
|
|
|
|
def generate_image(prompt, path='test_image.png'): |
|
try: |
|
|
|
client = Client(constants.IMAGE_GENERATION_SPACE_NAME, hf_token=constants.HF_TOKEN) |
|
|
|
|
|
result = client.predict( |
|
param_0=prompt, |
|
api_name="/predict" |
|
) |
|
|
|
image = Image.open(result) |
|
image.save(path) |
|
|
|
|
|
return result |
|
|
|
except Exception as e: |
|
print(f"Error during image generation: {e}") |
|
return {"error": str(e)} |
|
|
|
def generate_images(image_prompts, folder_name='test_folder'): |
|
folder_path = tmp_folder(folder_name) |
|
for index, prompt in enumerate(image_prompts): |
|
print(index, prompt) |
|
image_path = generate_image(prompt=prompt, path=f"{folder_path}/{index}.png") |
|
yield prompt, image_path |
|
|
|
|
|
|
|
def tmp_folder(folder_name: str) -> str: |
|
|
|
base_tmp_path = os.path.join(os.getcwd(), "tmp_dir") |
|
|
|
|
|
if not os.path.exists(base_tmp_path): |
|
os.makedirs(base_tmp_path) |
|
print(f"Base temporary folder '{base_tmp_path}' created.") |
|
|
|
|
|
folder_path = os.path.join(base_tmp_path, folder_name) |
|
|
|
|
|
os.makedirs(folder_path, exist_ok=True) |
|
|
|
print(f"Temporary folder '{folder_name}' is ready at {folder_path}.") |
|
|
|
return folder_path |
|
|
|
|
|
|
|
from moviepy.editor import * |
|
|
|
|
|
import os |
|
import tempfile |
|
from moviepy.editor import AudioFileClip, ImageClip, concatenate_videoclips |
|
|
|
|
|
def generate_video(audio_file, images, segments): |
|
try: |
|
|
|
file_extension = os.path.splitext(audio_file.name)[1] |
|
temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=f"{file_extension}") |
|
temp_audio_path.write(audio_file.read()) |
|
temp_audio_path.close() |
|
|
|
|
|
audio = AudioFileClip(temp_audio_path.name) |
|
|
|
|
|
frame_width = 1280 |
|
frame_height = 720 |
|
|
|
video_clips = [] |
|
total_segments = len(segments) |
|
|
|
for i, current_segment in enumerate(segments): |
|
start_time = current_segment["start"] |
|
end_time = current_segment["end"] |
|
|
|
|
|
if i < total_segments - 1: |
|
|
|
next_segment = segments[i + 1] |
|
actual_end_time = next_segment["start"] |
|
else: |
|
|
|
actual_end_time = end_time |
|
|
|
|
|
segment_duration = actual_end_time - start_time |
|
|
|
print(f"\nProcessing segment {i + 1}/{total_segments}:") |
|
print(f" Start time: {start_time}s") |
|
print(f" Base end time: {end_time}s") |
|
print(f" Actual end time: {actual_end_time}s") |
|
print(f" Total duration: {segment_duration}s") |
|
print(f" Text: '{current_segment['text']}'") |
|
|
|
|
|
image_path = images[min(i, len(images) - 1)] |
|
|
|
|
|
image_clip = ImageClip(image_path) |
|
|
|
|
|
image_clip = image_clip.resize(height=frame_height).on_color( |
|
size=(frame_width, frame_height), |
|
color=(0, 0, 0), |
|
pos="center" |
|
) |
|
|
|
|
|
image_clip = image_clip.set_duration(segment_duration) |
|
image_clip = image_clip.set_start(start_time) |
|
|
|
video_clips.append(image_clip) |
|
|
|
|
|
print("Concatenating video clips...") |
|
video = concatenate_videoclips(video_clips, method="compose") |
|
|
|
|
|
video = video.set_audio(audio) |
|
|
|
|
|
temp_dir = tempfile.gettempdir() |
|
video_path = os.path.join(temp_dir, "generated_video.mp4") |
|
print(f"Writing video file to {video_path}...") |
|
video.write_videofile(video_path, fps=30, codec="libx264", audio_codec="aac") |
|
|
|
|
|
os.remove(temp_audio_path.name) |
|
print("Temporary audio file removed.") |
|
|
|
return video_path |
|
|
|
except Exception as e: |
|
print(f"Error generating video: {e}") |
|
return None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
result = generate_images(["a guy in jungle", "a waterfall","greenery"]) |
|
|
|
|
|
|