Spaces:
Running
Running
from fastapi import FastAPI, HTTPException | |
from fastapi.responses import FileResponse | |
from gtts import gTTS | |
import cv2 | |
import numpy as np | |
from PIL import Image, ImageDraw, ImageFont | |
import os | |
from concurrent.futures import ThreadPoolExecutor | |
import asyncio | |
app = FastAPI() | |
# Function to split the script into smaller chunks | |
def split_script(script: str, max_words: int = 30): | |
words = script.split() | |
chunks = [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)] | |
return chunks | |
# Function to create a video segment from a script chunk | |
def create_video_segment(script_chunk: str, background_color: str, text_color: str, font_size: int): | |
try: | |
# Step 1: Convert script chunk to audio using gTTS | |
tts = gTTS(script_chunk) | |
audio_file = f"output_audio_{os.urandom(4).hex()}.mp3" | |
tts.save(audio_file) | |
# Step 2: Create a blank image with text | |
width, height = 1280, 720 # HD resolution | |
background_color_rgb = tuple(int(background_color.lstrip("#")[i:i+2], 16) for i in (0, 2, 4)) | |
text_color_rgb = tuple(int(text_color.lstrip("#")[i:i+2], 16) for i in (0, 2, 4)) | |
# Create a blank image with the background color | |
image = Image.new("RGB", (width, height), background_color_rgb) | |
draw = ImageDraw.Draw(image) | |
# Load a font (you can use a default font or provide a .ttf file) | |
try: | |
font = ImageFont.truetype("arial.ttf", font_size) | |
except IOError: | |
font = ImageFont.load_default() | |
# Add text to the image | |
text_width, text_height = draw.textsize(script_chunk, font=font) | |
text_x = (width - text_width) // 2 | |
text_y = (height - text_height) // 2 | |
draw.text((text_x, text_y), script_chunk, font=font, fill=text_color_rgb) | |
# Convert the image to a numpy array for OpenCV | |
frame = np.array(image) | |
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) | |
# Step 3: Create a video segment with the image and audio | |
video_segment_file = f"video_segment_{os.urandom(4).hex()}.mp4" | |
fps = 24 | |
fourcc = cv2.VideoWriter_fourcc(*"mp4v") | |
video_writer = cv2.VideoWriter(video_segment_file, fourcc, fps, (width, height)) | |
# Calculate the number of frames based on audio duration | |
audio_duration = len(script_chunk.split()) * 0.5 # Approximate duration (adjust as needed) | |
num_frames = int(audio_duration * fps) | |
# Write the frames to the video | |
for _ in range(num_frames): | |
video_writer.write(frame) | |
# Release the video writer | |
video_writer.release() | |
# Step 4: Add audio to the video segment using ffmpeg (if available) | |
if os.path.exists(audio_file): | |
final_video_segment_file = f"final_{video_segment_file}" | |
os.system(f"ffmpeg -i {video_segment_file} -i {audio_file} -c:v copy -c:a aac {final_video_segment_file}") | |
os.remove(video_segment_file) | |
os.remove(audio_file) | |
return final_video_segment_file | |
else: | |
return video_segment_file | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=str(e)) | |
# Function to combine video segments into a single video | |
def combine_video_segments(video_segment_files: list, output_file: str = "final_output_video.mp4"): | |
try: | |
with open("video_list.txt", "w") as f: | |
for segment in video_segment_files: | |
f.write(f"file '{segment}'\n") | |
# Combine video segments using ffmpeg | |
os.system(f"ffmpeg -f concat -safe 0 -i video_list.txt -c copy {output_file}") | |
# Clean up video segments | |
for segment in video_segment_files: | |
os.remove(segment) | |
os.remove("video_list.txt") | |
return output_file | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=str(e)) | |
# API Endpoint to generate video | |
async def generate_video(script: str, background_color: str = "#000000", text_color: str = "#FFFFFF", font_size: int = 50): | |
if not script: | |
raise HTTPException(status_code=400, detail="Script cannot be empty") | |
try: | |
# Step 1: Split the script into smaller chunks | |
script_chunks = split_script(script) | |
# Step 2: Generate video segments in parallel | |
with ThreadPoolExecutor() as executor: | |
video_segment_files = list(executor.map( | |
lambda chunk: create_video_segment(chunk, background_color, text_color, font_size), | |
script_chunks | |
)) | |
# Step 3: Combine video segments into a single video | |
final_video_file = combine_video_segments(video_segment_files) | |
# Step 4: Return the final video file | |
if not os.path.exists(final_video_file): | |
raise HTTPException(status_code=500, detail="Failed to generate video") | |
return FileResponse(final_video_file, media_type="video/mp4", filename="generated_video.mp4") | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=str(e)) | |
# Run the application | |
if __name__ == "__main__": | |
import uvicorn | |
uvicorn.run(app, host="0.0.0.0", port=8000) |