from fastapi import FastAPI, HTTPException from fastapi.responses import FileResponse import pyttsx3 # Faster TTS library import moviepy.editor as mp # Efficient video generation from PIL import Image, ImageDraw, ImageFont import numpy as np import os from concurrent.futures import ThreadPoolExecutor import asyncio from io import BytesIO app = FastAPI() # Initialize pyttsx3 TTS engine engine = pyttsx3.init() # Function to split the script into smaller chunks def split_script(script: str, max_words: int = 30): words = script.split() chunks = [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)] return chunks # Function to create a video segment from a script chunk def create_video_segment(script_chunk: str, background_color: str, text_color: str, font_size: int): try: # Step 1: Convert script chunk to audio using pyttsx3 audio_file = f"output_audio_{os.urandom(4).hex()}.mp3" engine.save_to_file(script_chunk, audio_file) engine.runAndWait() # Step 2: Create a blank image with text width, height = 1280, 720 # HD resolution background_color_rgb = tuple(int(background_color.lstrip("#")[i:i+2], 16) for i in (0, 2, 4)) text_color_rgb = tuple(int(text_color.lstrip("#")[i:i+2], 16) for i in (0, 2, 4)) # Create a blank image with the background color image = Image.new("RGB", (width, height), background_color_rgb) draw = ImageDraw.Draw(image) # Load a font (you can use a default font or provide a .ttf file) try: font = ImageFont.truetype("arial.ttf", font_size) except IOError: font = ImageFont.load_default() # Add text to the image text_width, text_height = draw.textsize(script_chunk, font=font) text_x = (width - text_width) // 2 text_y = (height - text_height) // 2 draw.text((text_x, text_y), script_chunk, font=font, fill=text_color_rgb) # Convert the image to a numpy array for moviepy frame = np.array(image) # Step 3: Create a video segment with the image and audio using moviepy video_segment_file = f"video_segment_{os.urandom(4).hex()}.mp4" clip = mp.ImageClip(frame).set_duration(len(script_chunk.split()) * 0.5) # Approximate duration audio_clip = mp.AudioFileClip(audio_file) final_clip = clip.set_audio(audio_clip) final_clip.write_videofile(video_segment_file, fps=24) # Clean up temporary audio file os.remove(audio_file) return video_segment_file except Exception as e: raise HTTPException(status_code=500, detail=str(e)) # Function to combine video segments into a single video def combine_video_segments(video_segment_files: list, output_file: str = "final_output_video.mp4"): try: # Combine video segments using moviepy clips = [mp.VideoFileClip(segment) for segment in video_segment_files] final_clip = mp.concatenate_videoclips(clips) final_clip.write_videofile(output_file) # Clean up video segments for segment in video_segment_files: os.remove(segment) return output_file except Exception as e: raise HTTPException(status_code=500, detail=str(e)) # API Endpoint to generate video @app.post("/generate-video") async def generate_video(script: str, background_color: str = "#000000", text_color: str = "#FFFFFF", font_size: int = 50): if not script: raise HTTPException(status_code=400, detail="Script cannot be empty") try: # Step 1: Split the script into smaller chunks script_chunks = split_script(script) # Step 2: Generate video segments in parallel with ThreadPoolExecutor() as executor: video_segment_files = list(executor.map( lambda chunk: create_video_segment(chunk, background_color, text_color, font_size), script_chunks )) # Step 3: Combine video segments into a single video final_video_file = combine_video_segments(video_segment_files) # Step 4: Return the final video file if not os.path.exists(final_video_file): raise HTTPException(status_code=500, detail="Failed to generate video") return FileResponse(final_video_file, media_type="video/mp4", filename="generated_video.mp4") except Exception as e: raise HTTPException(status_code=500, detail=str(e)) # Run the application if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)