Spaces:

Aliashraf
/

video_generation

Running

File size: 5,232 Bytes

from fastapi import FastAPI, HTTPException
from fastapi.responses import FileResponse
from gtts import gTTS
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import os
from concurrent.futures import ThreadPoolExecutor
import asyncio

app = FastAPI()

# Function to split the script into smaller chunks
def split_script(script: str, max_words: int = 30):
    words = script.split()
    chunks = [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)]
    return chunks

# Function to create a video segment from a script chunk
def create_video_segment(script_chunk: str, background_color: str, text_color: str, font_size: int):
    try:
        # Step 1: Convert script chunk to audio using gTTS
        tts = gTTS(script_chunk)
        audio_file = f"output_audio_{os.urandom(4).hex()}.mp3"
        tts.save(audio_file)

        # Step 2: Create a blank image with text
        width, height = 1280, 720  # HD resolution
        background_color_rgb = tuple(int(background_color.lstrip("#")[i:i+2], 16) for i in (0, 2, 4))
        text_color_rgb = tuple(int(text_color.lstrip("#")[i:i+2], 16) for i in (0, 2, 4))

        # Create a blank image with the background color
        image = Image.new("RGB", (width, height), background_color_rgb)
        draw = ImageDraw.Draw(image)

        # Load a font (you can use a default font or provide a .ttf file)
        try:
            font = ImageFont.truetype("arial.ttf", font_size)
        except IOError:
            font = ImageFont.load_default()

        # Add text to the image
        text_width, text_height = draw.textsize(script_chunk, font=font)
        text_x = (width - text_width) // 2
        text_y = (height - text_height) // 2
        draw.text((text_x, text_y), script_chunk, font=font, fill=text_color_rgb)

        # Convert the image to a numpy array for OpenCV
        frame = np.array(image)
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        # Step 3: Create a video segment with the image and audio
        video_segment_file = f"video_segment_{os.urandom(4).hex()}.mp4"
        fps = 24
        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
        video_writer = cv2.VideoWriter(video_segment_file, fourcc, fps, (width, height))

        # Calculate the number of frames based on audio duration
        audio_duration = len(script_chunk.split()) * 0.5  # Approximate duration (adjust as needed)
        num_frames = int(audio_duration * fps)

        # Write the frames to the video
        for _ in range(num_frames):
            video_writer.write(frame)

        # Release the video writer
        video_writer.release()

        # Step 4: Add audio to the video segment using ffmpeg (if available)
        if os.path.exists(audio_file):
            final_video_segment_file = f"final_{video_segment_file}"
            os.system(f"ffmpeg -i {video_segment_file} -i {audio_file} -c:v copy -c:a aac {final_video_segment_file}")
            os.remove(video_segment_file)
            os.remove(audio_file)
            return final_video_segment_file
        else:
            return video_segment_file
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

# Function to combine video segments into a single video
def combine_video_segments(video_segment_files: list, output_file: str = "final_output_video.mp4"):
    try:
        with open("video_list.txt", "w") as f:
            for segment in video_segment_files:
                f.write(f"file '{segment}'\n")

        # Combine video segments using ffmpeg
        os.system(f"ffmpeg -f concat -safe 0 -i video_list.txt -c copy {output_file}")

        # Clean up video segments
        for segment in video_segment_files:
            os.remove(segment)
        os.remove("video_list.txt")

        return output_file
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

# API Endpoint to generate video
@app.post("/generate-video")
async def generate_video(script: str, background_color: str = "#000000", text_color: str = "#FFFFFF", font_size: int = 50):
    if not script:
        raise HTTPException(status_code=400, detail="Script cannot be empty")

    try:
        # Step 1: Split the script into smaller chunks
        script_chunks = split_script(script)

        # Step 2: Generate video segments in parallel
        with ThreadPoolExecutor() as executor:
            video_segment_files = list(executor.map(
                lambda chunk: create_video_segment(chunk, background_color, text_color, font_size),
                script_chunks
            ))

        # Step 3: Combine video segments into a single video
        final_video_file = combine_video_segments(video_segment_files)

        # Step 4: Return the final video file
        if not os.path.exists(final_video_file):
            raise HTTPException(status_code=500, detail="Failed to generate video")

        return FileResponse(final_video_file, media_type="video/mp4", filename="generated_video.mp4")
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

# Run the application
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)