Spaces:

mgbam
/

CingenAI

Running

File size: 18,846 Bytes

287c9ca
9d84ba9
8583908
9840152
9d84ba9
990e23e
287c9ca
8583908
 
 
9840152
5e4272a
f02ab98
f13d4b2
 
f02ab98
f13d4b2
f02ab98
 
 
f13d4b2
 
 
 
 
 
 
 
 
f02ab98
f13d4b2
f02ab98
 
 
f13d4b2
 
287c9ca
9d84ba9
f13d4b2
 
 
 
 
 
 
 
f02ab98
f13d4b2
 
 
f02ab98
f13d4b2
f02ab98
f13d4b2
 
 
f02ab98
 
 
 
f13d4b2
f02ab98
f13d4b2
f02ab98
f13d4b2
 
f02ab98
 
f13d4b2
 
 
 
f02ab98
 
09d5c67
5e4272a
f02ab98
 
f13d4b2
 
 
 
 
 
f02ab98
 
 
 
 
 
 
 
f13d4b2
f02ab98
f13d4b2
 
 
f02ab98
 
 
 
 
f13d4b2
29c2122
 
f02ab98
f13d4b2
 
 
50c620f
f13d4b2
 
 
 
 
 
f02ab98
f13d4b2
f02ab98
f13d4b2
5e4272a
f02ab98
 
f13d4b2
 
 
 
 
 
29c2122
f13d4b2
 
 
 
 
 
 
 
f02ab98
f13d4b2
 
 
 
 
 
f02ab98
f13d4b2
 
 
f02ab98
f13d4b2
 
9840152
 
f02ab98
 
9840152
 
f02ab98
 
5e4272a
f13d4b2
29c2122
f13d4b2
f02ab98
f13d4b2
9840152
 
f02ab98
 
 
9840152
b97795f
29c2122
41b47a8
09d5c67
9d84ba9
 
 
f02ab98
 
5e4272a
f13d4b2
 
5e4272a
 
 
f02ab98
5e4272a
 
 
9d84ba9
f13d4b2
5e4272a
f02ab98
5e4272a
f02ab98
5e4272a
f02ab98
f13d4b2
f02ab98
 
 
990e23e
f02ab98
f13d4b2
 
29c2122
f13d4b2
9840152
f02ab98
5e4272a
 
f02ab98
5e4272a
f13d4b2
f02ab98
5e4272a
f02ab98
5e4272a
29c2122
5e4272a
f13d4b2
f02ab98
f13d4b2
 
9840152
 
f02ab98
f13d4b2
f02ab98
f13d4b2
 
 
 
 
 
 
 
 
f02ab98
f13d4b2
 
 
 
 
 
f02ab98
f13d4b2
 
f02ab98
f13d4b2
f02ab98
9840152
 
5e4272a
f02ab98
f13d4b2
8583908
 
9d84ba9
f02ab98
8583908
29c2122
 
 
 
f13d4b2
29c2122
 
9d84ba9
8583908
f13d4b2
29c2122
 
9d84ba9
29c2122
9d84ba9
29c2122
 
5e4272a
9d84ba9
9840152
9d84ba9
f02ab98
9d84ba9
f02ab98
5e4272a
29c2122
 
 
9840152
 
 
 
f02ab98
 
 
 
 
9840152
f13d4b2
 
f02ab98
 
8583908
 
b97795f
f02ab98
f13d4b2
5e4272a
9d84ba9
f02ab98
 
 
29c2122
f13d4b2
 
5e4272a

# core/visual_engine.py
from PIL import Image, ImageDraw, ImageFont
from moviepy.editor import (ImageClip, concatenate_videoclips, TextClip, 
                            CompositeVideoClip, AudioFileClip)
import moviepy.video.fx.all as vfx 
import numpy as np
import os
import openai
import requests
import io
import time
import random 
import subprocess # For dummy video fallback

# --- ElevenLabs Import ---
# These will be assigned if the import is successful
ELEVENLABS_CLIENT_IMPORTED = False
ElevenLabsAPIClient = None 
Voice = None              
VoiceSettings = None      

try:
    from elevenlabs.client import ElevenLabs as ImportedElevenLabsClient 
    from elevenlabs import Voice as ImportedVoice, VoiceSettings as ImportedVoiceSettings
    
    ElevenLabsAPIClient = ImportedElevenLabsClient 
    Voice = ImportedVoice
    VoiceSettings = ImportedVoiceSettings
    ELEVENLABS_CLIENT_IMPORTED = True
    print("INFO: Successfully imported ElevenLabs client components (SDK v1.x.x pattern).")
except ImportError as e_eleven:
    print(f"WARNING: Could not import ElevenLabs client components: {e_eleven}. ElevenLabs audio will be disabled.")
except Exception as e_gen_eleven: 
    print(f"WARNING: General error importing ElevenLabs: {e_gen_eleven}. ElevenLabs audio will be disabled.")


class VisualEngine:
    def __init__(self, output_dir="temp_cinegen_media"):
        self.output_dir = output_dir
        os.makedirs(self.output_dir, exist_ok=True)
        
        self.font_filename = "arial.ttf" 
        self.font_path_in_container = f"/usr/local/share/fonts/truetype/mycustomfonts/{self.font_filename}"
        self.font_size_pil = 20 
        self.video_overlay_font_size = 30 
        self.video_overlay_font_color = 'white'
        self.video_overlay_font = 'Arial-Bold' # Relies on ImageMagick font discovery

        try:
            self.font = ImageFont.truetype(self.font_path_in_container, self.font_size_pil)
            print(f"INFO: Placeholder font loaded: {self.font_path_in_container}.")
        except IOError:
            print(f"WARNING: Placeholder font '{self.font_path_in_container}' not found. Using default.")
            self.font = ImageFont.load_default()
            self.font_size_pil = 10

        # API Client States
        self.openai_api_key = None; self.USE_AI_IMAGE_GENERATION = False
        self.dalle_model = "dall-e-3"; self.image_size_dalle3 = "1792x1024" # Landscape orientation
        self.video_frame_size = (1280, 720) # 16:9 standard HD

        self.elevenlabs_api_key = None; self.USE_ELEVENLABS = False
        self.elevenlabs_client = None 
        self.elevenlabs_voice_id = "Rachel" # Default
        if VoiceSettings: # Check if VoiceSettings was successfully imported
            self.elevenlabs_voice_settings = VoiceSettings(
                stability=0.60, similarity_boost=0.80, # Adjusted for potentially more character
                style=0.15, use_speaker_boost=True
            )
        else:
            self.elevenlabs_voice_settings = None

        self.pexels_api_key = None; self.USE_PEXELS = False
        print("INFO: VisualEngine initialized.")

    def set_openai_api_key(self,k): 
        self.openai_api_key=k; self.USE_AI_IMAGE_GENERATION=bool(k)
        print(f"INFO: DALL-E ({self.dalle_model}) {'Ready.' if k else 'Disabled (no API key).'}")

    def set_elevenlabs_api_key(self,api_key):
        self.elevenlabs_api_key=api_key
        if api_key and ELEVENLABS_CLIENT_IMPORTED and ElevenLabsAPIClient: 
            try: 
                self.elevenlabs_client = ElevenLabsAPIClient(api_key=api_key) 
                # Minimal test: Check if client object was created.
                # Fetching voices makes an API call, can do it later if needed.
                if self.elevenlabs_client:
                    self.USE_ELEVENLABS=True
                    print("INFO: ElevenLabs Client Ready.")
                else: # Should not happen if ElevenLabsAPIClient() doesn't raise error
                    print("WARNING: ElevenLabs client is None after init. ElevenLabs Disabled.")
                    self.USE_ELEVENLABS=False
            except Exception as e: 
                print(f"ERROR: Initializing ElevenLabs client: {e}. ElevenLabs Disabled."); 
                self.USE_ELEVENLABS=False; self.elevenlabs_client = None
        else: 
            self.USE_ELEVENLABS=False; self.elevenlabs_client = None
            if not ELEVENLABS_CLIENT_IMPORTED or not ElevenLabsAPIClient:
                # This message is already printed at import time if it fails
                pass 
            else:
                print("INFO: ElevenLabs API Key not provided or client class not imported. ElevenLabs Disabled.")
            
    def set_pexels_api_key(self,k):
        self.pexels_api_key=k; self.USE_PEXELS=bool(k)
        print(f"INFO: Pexels Search {'Ready.' if k else 'Disabled (no API key).'}")
    
    def _get_text_dimensions(self,text_content,font_obj):
        if not text_content: return 0,self.font_size_pil 
        try:
            if hasattr(font_obj,'getbbox'): 
                bbox=font_obj.getbbox(text_content);w=bbox[2]-bbox[0];h=bbox[3]-bbox[1]
                return w, h if h > 0 else self.font_size_pil
            elif hasattr(font_obj,'getsize'): 
                w,h=font_obj.getsize(text_content)
                return w, h if h > 0 else self.font_size_pil
            else: 
                return int(len(text_content)*self.font_size_pil*0.6),int(self.font_size_pil*1.2 if self.font_size_pil*1.2>0 else self.font_size_pil)
        except Exception: 
            return int(len(text_content)*self.font_size_pil*0.6),int(self.font_size_pil*1.2)
    
    def _create_placeholder_image_content(self,text_description,filename,size=None):
        if size is None: size = self.video_frame_size # Default to video frame size
        img=Image.new('RGB',size,color=(20,20,40));d=ImageDraw.Draw(img);padding=25;max_w=size[0]-(2*padding);lines=[];
        if not text_description: text_description="(Placeholder: No prompt text)"
        words=text_description.split();current_line=""
        for word in words:
            test_line=current_line+word+" "
            if self._get_text_dimensions(test_line,self.font)[0] <= max_w: current_line=test_line
            else:
                if current_line: lines.append(current_line.strip())
                current_line=word+" "
        if current_line: lines.append(current_line.strip())
        if not lines: lines.append("(Text error or too long for placeholder)")

        _,single_line_h=self._get_text_dimensions("Ay",self.font)
        single_line_h = single_line_h if single_line_h > 0 else self.font_size_pil + 2
        
        max_lines_to_display=min(len(lines),(size[1]-(2*padding))//(single_line_h+2))
        
        y_text=padding + (size[1]-(2*padding) - max_lines_to_display*(single_line_h+2))/2.0

        for i in range(max_lines_to_display):
            line_content=lines[i];line_w,_=self._get_text_dimensions(line_content,self.font);x_text=(size[0]-line_w)/2.0
            d.text((x_text,y_text),line_content,font=self.font,fill=(200,200,180));y_text+=single_line_h+2
            if i==6 and max_lines_to_display > 7: 
                d.text((x_text,y_text),"...",font=self.font,fill=(200,200,180));break
        filepath=os.path.join(self.output_dir,filename)
        try:img.save(filepath);return filepath
        except Exception as e:print(f"ERROR: Saving placeholder image {filepath}: {e}");return None

    def _search_pexels_image(self, query, output_filename_base):
        if not self.USE_PEXELS or not self.pexels_api_key: return None
        headers = {"Authorization": self.pexels_api_key}
        params = {"query": query, "per_page": 1, "orientation": "landscape", "size": "large"}
        pexels_filename = output_filename_base.replace(".png", f"_pexels_{random.randint(1000,9999)}.jpg")
        filepath = os.path.join(self.output_dir, pexels_filename)
        try:
            print(f"INFO: Searching Pexels for: '{query}'")
            effective_query = " ".join(query.split()[:5]) 
            params["query"] = effective_query
            response = requests.get("https://api.pexels.com/v1/search", headers=headers, params=params, timeout=20)
            response.raise_for_status(); data = response.json()
            if data.get("photos") and len(data["photos"]) > 0:
                photo_url = data["photos"][0]["src"]["large2x"] 
                image_response = requests.get(photo_url, timeout=60); image_response.raise_for_status()
                img_data = Image.open(io.BytesIO(image_response.content))
                if img_data.mode != 'RGB': img_data = img_data.convert('RGB')
                img_data.save(filepath); print(f"INFO: Pexels image saved: {filepath}"); return filepath
            else: print(f"INFO: No photos found on Pexels for query: '{effective_query}'")
        except Exception as e: print(f"ERROR: Pexels search/download for query '{query}': {e}")
        return None

    def generate_image_visual(self, image_prompt_text, scene_data, scene_identifier_filename):
        filepath = os.path.join(self.output_dir, scene_identifier_filename)
        if self.USE_AI_IMAGE_GENERATION and self.openai_api_key:
            max_retries = 2
            for attempt in range(max_retries):
                try:
                    print(f"INFO: Attempt {attempt+1}: DALL-E ({self.dalle_model}) for: {image_prompt_text[:100]}...")
                    client = openai.OpenAI(api_key=self.openai_api_key, timeout=90.0) 
                    response = client.images.generate(
                        model=self.dalle_model, prompt=image_prompt_text, n=1, 
                        size=self.image_size_dalle3, quality="hd", response_format="url", style="vivid"
                    )
                    image_url = response.data[0].url
                    revised_prompt = getattr(response.data[0], 'revised_prompt', None)
                    if revised_prompt: print(f"INFO: DALL-E 3 revised_prompt: {revised_prompt[:100]}...")
                    
                    image_response = requests.get(image_url, timeout=120)
                    image_response.raise_for_status()
                    img_data = Image.open(io.BytesIO(image_response.content))
                    if img_data.mode != 'RGB': img_data = img_data.convert('RGB')
                    
                    img_data.save(filepath); print(f"INFO: AI Image (DALL-E) saved: {filepath}"); return filepath 
                except openai.RateLimitError as e: 
                    print(f"WARNING: OpenAI Rate Limit: {e}. Retrying after {5*(attempt+1)}s...")
                    time.sleep(5 * (attempt + 1))
                    if attempt == max_retries - 1: print("ERROR: Max retries for RateLimitError."); break
                    else: continue
                except openai.APIError as e: print(f"ERROR: OpenAI API Error: {e}"); break 
                except requests.exceptions.RequestException as e: print(f"ERROR: Requests Error (DALL-E download): {e}"); break
                except Exception as e: print(f"ERROR: Generic error (DALL-E gen): {e}"); break
            
            print("WARNING: DALL-E generation failed. Trying Pexels fallback...")
            pexels_query_text = scene_data.get('pexels_search_query_감독', 
                                             f"{scene_data.get('emotional_beat','')} {scene_data.get('setting_description','')}")
            pexels_path = self._search_pexels_image(pexels_query_text, scene_identifier_filename)
            if pexels_path: return pexels_path
            
            print("WARNING: Pexels also failed/disabled. Using placeholder.")
            return self._create_placeholder_image_content(
                f"[AI/Pexels Failed] Original Prompt: {image_prompt_text[:100]}...", 
                scene_identifier_filename 
            )
        else: 
            # print(f"INFO: AI image generation not enabled/ready. Creating placeholder.")
            return self._create_placeholder_image_content(
                image_prompt_text, scene_identifier_filename 
            )

    def generate_narration_audio(self, text_to_narrate, output_filename="narration_overall.mp3"):
        if not self.USE_ELEVENLABS or not self.elevenlabs_client or not text_to_narrate:
            # print("INFO: ElevenLabs not enabled, client not initialized, or no text. Skipping audio.")
            return None
        
        audio_filepath = os.path.join(self.output_dir, output_filename)
        try:
            print(f"INFO: Generating ElevenLabs audio (Voice: {self.elevenlabs_voice_id}) for: {text_to_narrate[:70]}...")
            
            voice_param = self.elevenlabs_voice_id 
            if Voice and self.elevenlabs_voice_settings: # Check if Voice & VoiceSettings were imported
                voice_param = Voice(
                    voice_id=self.elevenlabs_voice_id,
                    settings=self.elevenlabs_voice_settings
                )
            
            audio_data_iterator = self.elevenlabs_client.generate(
                text=text_to_narrate,
                voice=voice_param, 
                model="eleven_multilingual_v2" 
            )
            
            with open(audio_filepath, "wb") as f:
                for chunk in audio_data_iterator: 
                    if chunk: f.write(chunk)
            
            print(f"INFO: ElevenLabs audio saved: {audio_filepath}")
            return audio_filepath
        except AttributeError as ae:
             print(f"ERROR: AttributeError with ElevenLabs client (method 'generate' might be different or client not fully init): {ae}")
        except Exception as e:
            print(f"ERROR: Generating ElevenLabs audio: {e}")
        return None

    def create_video_from_images(self, image_data_list, overall_narration_path=None, output_filename="final_video.mp4", fps=24, duration_per_image=4.5):
        if not image_data_list: print("WARNING: No image data for video."); return None
        processed_clips = []; narration_audio_clip = None; final_video_clip_obj = None

        for i, data in enumerate(image_data_list):
            img_path, scene_num, key_action = data.get('path'), data.get('scene_num', i+1), data.get('key_action', '')
            if not (img_path and os.path.exists(img_path)): print(f"WARNING: Img not found: {img_path}"); continue
            try:
                pil_img = Image.open(img_path); 
                if pil_img.mode != 'RGB': pil_img = pil_img.convert('RGB')
                img_copy = pil_img.copy()
                img_copy.thumbnail(self.video_frame_size, Image.Resampling.LANCZOS)
                canvas = Image.new('RGB', self.video_frame_size, (random.randint(0,10), random.randint(0,10), random.randint(0,10))) 
                xo, yo = (self.video_frame_size[0]-img_copy.width)//2, (self.video_frame_size[1]-img_copy.height)//2
                canvas.paste(img_copy, (xo,yo))
                frame_np = np.array(canvas)
                img_clip = ImageClip(frame_np).set_duration(duration_per_image)
                end_scale = random.uniform(1.05, 1.12) 
                img_clip = img_clip.fx(vfx.resize, lambda t: 1 + (end_scale - 1) * (t / duration_per_image))
                img_clip = img_clip.set_position('center')
                if key_action:
                    txt_clip = TextClip(f"Scene {scene_num}\n{key_action}", fontsize=self.video_overlay_font_size, 
                                        color=self.video_overlay_font_color, font=self.video_overlay_font,
                                        bg_color='rgba(10,10,20,0.75)', method='caption', align='West',
                                        size=(self.video_frame_size[0]*0.9, None), kerning=-1, stroke_color='black', stroke_width=1
                                       ).set_duration(duration_per_image - 1.0).set_start(0.5).set_position(('center', 0.9), relative=True)
                    final_scene_clip = CompositeVideoClip([img_clip, txt_clip], size=self.video_frame_size)
                else: final_scene_clip = img_clip
                processed_clips.append(final_scene_clip)
            except Exception as e: print(f"ERROR: Creating video clip for {img_path}: {e}.")
        
        if not processed_clips: print("WARNING: No clips processed for video."); return None
        transition = 0.8 
        final_video_clip_obj = concatenate_videoclips(processed_clips, padding=-transition, method="compose")
        if final_video_clip_obj.duration > transition*2: 
            final_video_clip_obj = final_video_clip_obj.fx(vfx.fadein, transition).fx(vfx.fadeout, transition)
        
        if overall_narration_path and os.path.exists(overall_narration_path):
            try:
                narration_audio_clip = AudioFileClip(overall_narration_path)
                # Ensure video duration matches audio if audio is shorter, or cap audio at video length.
                # MoviePy's set_audio will truncate the longer of the two to match the shorter one IF the video has no audio track yet.
                # If video already has audio, it replaces. If video is shorter than new audio, video extends with last frame.
                # We want audio to dictate length if it's shorter than visual sequence.
                if narration_audio_clip.duration < final_video_clip_obj.duration:
                    final_video_clip_obj = final_video_clip_obj.subclip(0, narration_audio_clip.duration)
                
                final_video_clip_obj = final_video_clip_obj.set_audio(narration_audio_clip)
                print("INFO: Overall narration added to video.")
            except Exception as e: print(f"ERROR: Adding overall narration: {e}.")
        
        output_path = os.path.join(self.output_dir, output_filename)
        try:
            print(f"INFO: Writing final video to: {output_path}")
            final_video_clip_obj.write_videofile(output_path, fps=fps, codec='libx264', preset='medium', 
                                        audio_codec='aac',
                                        temp_audiofile=os.path.join(self.output_dir, f'temp-audio-{os.urandom(4).hex()}.m4a'), 
                                        remove_temp=True, threads=os.cpu_count() or 2, logger='bar', bitrate="5000k")
            print(f"INFO: Video successfully created: {output_path}"); return output_path
        except Exception as e: print(f"ERROR: Writing video file: {e}"); return None
        finally:
            for c_item in processed_clips: 
                if hasattr(c_item, 'close'): c_item.close()
            if narration_audio_clip and hasattr(narration_audio_clip, 'close'): narration_audio_clip.close()
            if final_video_clip_obj and hasattr(final_video_clip_obj, 'close'): final_video_clip_obj.close()