|
|
|
import requests |
|
import constants |
|
import os |
|
from PIL import Image |
|
from gradio_client import Client |
|
import moviepy.editor as mp |
|
from moviepy.video.VideoClip import ImageClip |
|
from moviepy.editor import AudioFileClip |
|
from structured_output_extractor import StructuredOutputExtractor |
|
from pydantic import BaseModel, Field |
|
from typing import List |
|
import tempfile |
|
import os |
|
|
|
|
|
def clean_response(result): |
|
print("\n\nStarted Cleaning Response") |
|
"""A temporary fix to the output of predict which returns output of openai-whisper-large-v3-turbo as string |
|
but it outputs: AutomaticSpeechRecognitionOutput(text=" sometimes life <- like this the class name still remains |
|
in the response, ideally which should have started from "sometimes..." as in the given example """ |
|
|
|
start_pos = result.find('text="') + len('text="') |
|
end_pos = result.find('", chunks=None') |
|
|
|
|
|
cleaned_result = result[start_pos:end_pos] |
|
print("Returning Cleaned Result: ", cleaned_result) |
|
return cleaned_result |
|
|
|
|
|
def get_translation(text: str): |
|
print('\n\nTranslating text: ', text, type(text)) |
|
|
|
data = {"text_input": text} |
|
|
|
|
|
headers = {"Authorization": f"Bearer {constants.HF_TOKEN}"} |
|
|
|
try: |
|
|
|
response = requests.post(constants.TRANSLATION_ENDPOINT, json=data, headers=headers) |
|
|
|
if response.status_code == 200: |
|
response_data = response.json() |
|
print("Returning Translation") |
|
return response_data.get("output", "No output found.") |
|
else: |
|
print("Some Error Occured During Translation Request") |
|
print(response) |
|
print(f"Error: {response.status_code}, {response.text}") |
|
return {"error_occured" : response.text} |
|
except Exception as e: |
|
print(f"An exception occurred: {e}") |
|
return {"error_occured" : e} |
|
|
|
|
|
|
|
def old_get_image_prompts(text_input): |
|
headers = { |
|
"Authorization": f"Bearer {constants.HF_TOKEN}", |
|
"Content-Type": "application/json" |
|
} |
|
|
|
endpoint = f"{constants.PROMPT_GENERATION_ENDPOINT}" |
|
payload = {"text_input": text_input} |
|
|
|
try: |
|
|
|
print("making post request for image prompts", endpoint) |
|
response = requests.post(endpoint, json=payload, headers=headers) |
|
|
|
|
|
response.raise_for_status() |
|
|
|
|
|
result = response.json() |
|
return result |
|
|
|
except requests.exceptions.RequestException as e: |
|
print(f"Error during request: {e}") |
|
return {"error": str(e)} |
|
|
|
def segments_to_chunks(segments): |
|
chunks = [] |
|
for segment in segments: |
|
chunks.append(segment.get("text")) |
|
return chunks |
|
|
|
|
|
def get_image_prompts(text_input : List): |
|
|
|
class ImagePromptResponseSchema(BaseModel): |
|
image_prompts: List[str] = Field( |
|
description="List of detailed image prompts, Each Image Prompt Per Chunk" |
|
) |
|
|
|
extractor = StructuredOutputExtractor(response_schema=ImagePromptResponseSchema) |
|
chunks_count = len(text_input) |
|
chunks = "chunk: " + "\nchunk: ".join(text_input) |
|
prompt = f"""ROLE: You are a Highly Experienced Image Prompt Sythesizer |
|
TASK: Generate {chunks_count} image prompts, Each per chunk\n\n {chunks}""" |
|
result = extractor.extract(prompt) |
|
return result.model_dump() |
|
|
|
|
|
|
|
|
|
|
|
def generate_image(prompt, path='test_image.png'): |
|
try: |
|
|
|
client = Client(constants.IMAGE_GENERATION_SPACE_NAME, hf_token=constants.HF_TOKEN) |
|
|
|
|
|
result = client.predict( |
|
param_0=prompt, |
|
api_name="/predict" |
|
) |
|
|
|
image = Image.open(result) |
|
image.save(path) |
|
|
|
|
|
return result |
|
|
|
except Exception as e: |
|
print(f"Error during image generation: {e}") |
|
return {"error": str(e)} |
|
|
|
def generate_images(image_prompts, folder_name='test_folder'): |
|
folder_path = tmp_folder(folder_name) |
|
for index, prompt in enumerate(image_prompts): |
|
print(index, prompt) |
|
image_path = generate_image(prompt=prompt, path=f"{folder_path}/{index}.png") |
|
yield prompt, image_path |
|
|
|
|
|
|
|
def tmp_folder(folder_name: str) -> str: |
|
|
|
base_tmp_path = os.path.join(os.getcwd(), "tmp_dir") |
|
|
|
|
|
if not os.path.exists(base_tmp_path): |
|
os.makedirs(base_tmp_path) |
|
print(f"Base temporary folder '{base_tmp_path}' created.") |
|
|
|
|
|
folder_path = os.path.join(base_tmp_path, folder_name) |
|
|
|
|
|
os.makedirs(folder_path, exist_ok=True) |
|
|
|
print(f"Temporary folder '{folder_name}' is ready at {folder_path}.") |
|
|
|
return folder_path |
|
|
|
|
|
|
|
def old_generate_video(audio_file, images, segments): |
|
print(f"images: {images}") |
|
print(f"segments: {segments}") |
|
print(f"audio file: {audio_file.name}") |
|
try: |
|
|
|
file_extension = os.path.splitext(audio_file.name)[1] |
|
temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=f"{file_extension}") |
|
temp_audio_path.write(audio_file.read()) |
|
temp_audio_path.close() |
|
|
|
|
|
audio = mp.AudioFileClip(temp_audio_path.name) |
|
audio_duration = audio.duration |
|
|
|
|
|
video_clips = [] |
|
for i, segment in enumerate(segments): |
|
start_time = segment["start"] |
|
end_time = segment["end"] |
|
|
|
|
|
image_path = images[min(i, len(images) - 1)] |
|
|
|
|
|
image_clip = ImageClip(image_path, duration=end_time - start_time) |
|
image_clip = image_clip.set_start(start_time).set_end(end_time) |
|
video_clips.append(image_clip) |
|
|
|
|
|
video = mp.concatenate_videoclips(video_clips, method="compose") |
|
|
|
|
|
video = video.set_audio(audio) |
|
|
|
|
|
temp_dir = tempfile.gettempdir() |
|
video_path = os.path.join(temp_dir, "generated_video.mp4") |
|
video.write_videofile(video_path, fps=24, codec="libx264", audio_codec="aac") |
|
|
|
|
|
os.remove(temp_audio_path.name) |
|
|
|
return video_path |
|
|
|
except Exception as e: |
|
print(f"Error generating video: {e}") |
|
return |
|
|
|
|
|
from moviepy.editor import * |
|
|
|
def generate_video(audio_file, images, segments): |
|
print(f"images: {images}") |
|
print(f"segments: {segments}") |
|
print(f"audio file: {audio_file.name}") |
|
try: |
|
|
|
file_extension = os.path.splitext(audio_file.name)[1] |
|
temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=f"{file_extension}") |
|
temp_audio_path.write(audio_file.read()) |
|
temp_audio_path.close() |
|
|
|
|
|
audio = AudioFileClip(temp_audio_path.name) |
|
audio_duration = audio.duration |
|
|
|
|
|
frame_width = 1920 |
|
frame_height = 1080 |
|
|
|
|
|
video_clips = [] |
|
for i, segment in enumerate(segments): |
|
start_time = segment["start"] |
|
end_time = segment["end"] |
|
|
|
|
|
image_path = images[min(i, len(images) - 1)] |
|
|
|
|
|
image_clip = ImageClip(image_path, duration=end_time - start_time) |
|
|
|
|
|
image_clip = image_clip.resize(height=frame_height).on_color( |
|
size=(frame_width, frame_height), |
|
color=(0, 0, 0), |
|
pos="center" |
|
) |
|
|
|
|
|
image_clip = image_clip.set_start(start_time).set_end(end_time) |
|
video_clips.append(image_clip) |
|
|
|
|
|
video = concatenate_videoclips(video_clips, method="compose") |
|
|
|
|
|
video = video.set_audio(audio) |
|
|
|
|
|
temp_dir = tempfile.gettempdir() |
|
video_path = os.path.join(temp_dir, "generated_video.mp4") |
|
video.write_videofile(video_path, fps=24, codec="libx264", audio_codec="aac") |
|
|
|
|
|
os.remove(temp_audio_path.name) |
|
|
|
return video_path |
|
|
|
except Exception as e: |
|
print(f"Error generating video: {e}") |
|
return |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
result = generate_images(["a guy in jungle", "a waterfall","greenery"]) |
|
|
|
|
|
|