wower99's picture
video generation feature v1 is functional
a46fd4b
raw
history blame
9.91 kB
import requests
import constants
import os
from PIL import Image
from gradio_client import Client
import moviepy.editor as mp
from moviepy.video.VideoClip import ImageClip
from moviepy.editor import AudioFileClip
from structured_output_extractor import StructuredOutputExtractor
from pydantic import BaseModel, Field
from typing import List
import tempfile
import os
def clean_response(result):
print("\n\nStarted Cleaning Response")
"""A temporary fix to the output of predict which returns output of openai-whisper-large-v3-turbo as string
but it outputs: AutomaticSpeechRecognitionOutput(text=" sometimes life <- like this the class name still remains
in the response, ideally which should have started from "sometimes..." as in the given example """
# Use find() to get the position of the start and end of the text
start_pos = result.find('text="') + len('text="') # Start after 'text="'
end_pos = result.find('", chunks=None') # End before '", chunks=None'
# Extract the text using slicing
cleaned_result = result[start_pos:end_pos]
print("Returning Cleaned Result: ", cleaned_result)
return cleaned_result
def get_translation(text: str):
print('\n\nTranslating text: ', text, type(text))
# Input payload
data = {"text_input": text}
# Headers for authentication
headers = {"Authorization": f"Bearer {constants.HF_TOKEN}"}
try:
# Make a GET request
response = requests.post(constants.TRANSLATION_ENDPOINT, json=data, headers=headers)
# Process response
if response.status_code == 200:
response_data = response.json()
print("Returning Translation")
return response_data.get("output", "No output found.")
else:
print("Some Error Occured During Translation Request")
print(response)
print(f"Error: {response.status_code}, {response.text}")
return {"error_occured" : response.text}
except Exception as e:
print(f"An exception occurred: {e}")
return {"error_occured" : e}
def old_get_image_prompts(text_input):
headers = {
"Authorization": f"Bearer {constants.HF_TOKEN}", # Replace with your token
"Content-Type": "application/json" # Optional, ensures JSON payload
}
endpoint = f"{constants.PROMPT_GENERATION_ENDPOINT}"
payload = {"text_input": text_input}
try:
# Send the POST request
print("making post request for image prompts", endpoint)
response = requests.post(endpoint, json=payload, headers=headers)
# Raise an exception for HTTP errors
response.raise_for_status()
# Parse JSON response
result = response.json()
return result
except requests.exceptions.RequestException as e:
print(f"Error during request: {e}")
return {"error": str(e)}
def segments_to_chunks(segments):
chunks = []
for segment in segments:
chunks.append(segment.get("text"))
return chunks
def get_image_prompts(text_input : List):
# Example Pydantic model (e.g., Movie)
class ImagePromptResponseSchema(BaseModel):
image_prompts: List[str] = Field(
description="List of detailed image prompts, Each Image Prompt Per Chunk"
)
extractor = StructuredOutputExtractor(response_schema=ImagePromptResponseSchema)
chunks_count = len(text_input)
chunks = "chunk: " + "\nchunk: ".join(text_input)
prompt = f"""ROLE: You are a Highly Experienced Image Prompt Sythesizer
TASK: Generate {chunks_count} image prompts, Each per chunk\n\n {chunks}"""
result = extractor.extract(prompt)
return result.model_dump() # returns dictionary version pydantic model
def generate_image(prompt, path='test_image.png'):
try:
# Initialize the Gradio Client with Hugging Face token
client = Client(constants.IMAGE_GENERATION_SPACE_NAME, hf_token=constants.HF_TOKEN)
# Make the API request
result = client.predict(
param_0=prompt, # Text prompt for image generation
api_name="/predict"
)
image = Image.open(result)
image.save(path)
# Return the result (which includes the URL or file path)
return result
except Exception as e:
print(f"Error during image generation: {e}")
return {"error": str(e)}
def generate_images(image_prompts, folder_name='test_folder'):
folder_path = tmp_folder(folder_name)
for index, prompt in enumerate(image_prompts):
print(index, prompt)
image_path = generate_image(prompt=prompt, path=f"{folder_path}/{index}.png")
yield prompt, image_path
def tmp_folder(folder_name: str) -> str:
# Use the current working directory or any other accessible path for temp folders
base_tmp_path = os.path.join(os.getcwd(), "tmp_dir") # Change this to any path you prefer
# Ensure that the base temp folder exists
if not os.path.exists(base_tmp_path):
os.makedirs(base_tmp_path)
print(f"Base temporary folder '{base_tmp_path}' created.")
# Define the path for the specific temporary folder
folder_path = os.path.join(base_tmp_path, folder_name)
# Create the specific temporary folder if it doesn't exist
os.makedirs(folder_path, exist_ok=True)
print(f"Temporary folder '{folder_name}' is ready at {folder_path}.")
return folder_path
def old_generate_video(audio_file, images, segments):
print(f"images: {images}")
print(f"segments: {segments}")
print(f"audio file: {audio_file.name}")
try:
# Save the uploaded audio file to a temporary location
file_extension = os.path.splitext(audio_file.name)[1]
temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=f"{file_extension}")
temp_audio_path.write(audio_file.read())
temp_audio_path.close()
# Load the audio file using MoviePy
audio = mp.AudioFileClip(temp_audio_path.name)
audio_duration = audio.duration
# Create video clips for each segment using the corresponding image
video_clips = []
for i, segment in enumerate(segments):
start_time = segment["start"]
end_time = segment["end"]
# Ensure the image index is within bounds
image_path = images[min(i, len(images) - 1)]
# Create an ImageClip for the current segment
image_clip = ImageClip(image_path, duration=end_time - start_time)
image_clip = image_clip.set_start(start_time).set_end(end_time)
video_clips.append(image_clip)
# Concatenate all the image clips to form the video
video = mp.concatenate_videoclips(video_clips, method="compose")
# Add the audio to the video
video = video.set_audio(audio)
# Save the video to a temporary file
temp_dir = tempfile.gettempdir()
video_path = os.path.join(temp_dir, "generated_video.mp4")
video.write_videofile(video_path, fps=24, codec="libx264", audio_codec="aac")
# Clean up the temporary audio file
os.remove(temp_audio_path.name)
return video_path
except Exception as e:
print(f"Error generating video: {e}")
return
from moviepy.editor import *
def generate_video(audio_file, images, segments):
print(f"images: {images}")
print(f"segments: {segments}")
print(f"audio file: {audio_file.name}")
try:
# Save the uploaded audio file to a temporary location
file_extension = os.path.splitext(audio_file.name)[1]
temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=f"{file_extension}")
temp_audio_path.write(audio_file.read())
temp_audio_path.close()
# Load the audio file using MoviePy
audio = AudioFileClip(temp_audio_path.name)
audio_duration = audio.duration
# Define YouTube-like dimensions (16:9 aspect ratio, e.g., 1920x1080)
frame_width = 1920
frame_height = 1080
# Create video clips for each segment using the corresponding image
video_clips = []
for i, segment in enumerate(segments):
start_time = segment["start"]
end_time = segment["end"]
# Ensure the image index is within bounds
image_path = images[min(i, len(images) - 1)]
# Create an ImageClip for the current segment
image_clip = ImageClip(image_path, duration=end_time - start_time)
# Resize and pad the image to fit a 16:9 aspect ratio
image_clip = image_clip.resize(height=frame_height).on_color(
size=(frame_width, frame_height),
color=(0, 0, 0), # Black background
pos="center" # Center the image
)
# Set the timing of the clip
image_clip = image_clip.set_start(start_time).set_end(end_time)
video_clips.append(image_clip)
# Concatenate all the image clips to form the video
video = concatenate_videoclips(video_clips, method="compose")
# Add the audio to the video
video = video.set_audio(audio)
# Save the video to a temporary file
temp_dir = tempfile.gettempdir()
video_path = os.path.join(temp_dir, "generated_video.mp4")
video.write_videofile(video_path, fps=24, codec="libx264", audio_codec="aac")
# Clean up the temporary audio file
os.remove(temp_audio_path.name)
return video_path
except Exception as e:
print(f"Error generating video: {e}")
return
# Example usage:
if __name__ == "__main__":
result = generate_images(["a guy in jungle", "a waterfall","greenery"])