Spaces:

mgbam
/

ChronoWeave

Sleeping

App Files Files Community

mgbam commited on Apr 15

Commit

247b2e3

verified ·

1 Parent(s): 59e152e

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -49

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
-# Copyright 2025 Google LLC. Based on work by Yousif Ahmed.
 # Concept: ChronoWeave – Branching Narrative Generation
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
 import streamlit as st
 import google.generativeai as genai
@@ -35,11 +36,14 @@ import typing_extensions as typing
 import nest_asyncio
 nest_asyncio.apply()
-# Import Vertex AI SDK for image generation (Preview API)
 import vertexai
 from vertexai.preview.vision_models import ImageGenerationModel
 from google.oauth2 import service_account
 # --- Logging Setup ---
 logging.basicConfig(
     level=logging.INFO,
@@ -59,7 +63,7 @@ Generate multiple, branching story timelines from a single theme using AI, compl
 TEXT_MODEL_ID = "models/gemini-1.5-flash"
 AUDIO_MODEL_ID = "models/gemini-1.5-flash"
 AUDIO_SAMPLING_RATE = 24000
-# Pretrained Imagen model identifier for Vertex AI preview
 IMAGE_MODEL_ID = "imagen-3.0-generate-002"
 DEFAULT_ASPECT_RATIO = "1:1"
 VIDEO_FPS = 24
@@ -80,14 +84,14 @@ except KeyError:
         st.error("🚨 **Google API Key Not Found!** Please configure it.", icon="🚨")
         st.stop()
-# Vertex AI configuration: load PROJECT_ID and LOCATION from secrets or environment.
 PROJECT_ID = st.secrets.get("PROJECT_ID") or os.environ.get("PROJECT_ID")
 LOCATION = st.secrets.get("LOCATION") or os.environ.get("LOCATION", "us-central1")
 if not PROJECT_ID:
     st.error("🚨 **PROJECT_ID not set!** Please add PROJECT_ID to your secrets.", icon="🚨")
     st.stop()
-# Load service account JSON from environment (secret name: SERVICE_ACCOUNT_JSON)
 try:
     service_account_info = json.loads(os.environ["SERVICE_ACCOUNT_JSON"])
     credentials = service_account.Credentials.from_service_account_info(service_account_info)
@@ -170,46 +174,26 @@ def wave_file_writer(filename: str, channels: int = 1, rate: int = AUDIO_SAMPLIN
             except Exception as e_close:
                 logger.error(f"Error closing wave file {filename}: {e_close}")
 async def generate_audio_live_async(api_text: str, output_filename: str, voice: Optional[str] = None) -> Optional[str]:
-    """Generates audio using Gemini Live API (async version) via the GenerativeModel."""
-    collected_audio = bytearray()
     task_id = os.path.basename(output_filename).split('.')[0]
-    logger.info(f"🎙️ [{task_id}] Requesting audio: '{api_text[:60]}...'")
     try:
-        config = {
-            "response_modalities": ["AUDIO"],
-            "audio_encoding": "LINEAR16",
-            "sample_rate_hertz": AUDIO_SAMPLING_RATE,
-        }
-        directive_prompt = f"Narrate directly: \"{api_text}\""
-        async with live_model.connect(config=config) as session:
-            await session.send_request([directive_prompt])
-            async for response in session.stream_content():
-                if response.audio_chunk and response.audio_chunk.data:
-                    collected_audio.extend(response.audio_chunk.data)
-                if hasattr(response, 'error') and response.error:
-                    logger.error(f"❌ [{task_id}] Audio stream error: {response.error}")
-                    st.error(f"Audio stream error {task_id}: {response.error}", icon="🔊")
-                    return None
-        if not collected_audio:
-            logger.warning(f"⚠️ [{task_id}] No audio data received.")
-            st.warning(f"No audio data for {task_id}.", icon="🔊")
-            return None
-        with wave_file_writer(output_filename, rate=AUDIO_SAMPLING_RATE) as wf:
-            wf.writeframes(bytes(collected_audio))
-        logger.info(f"✅ [{task_id}] Audio saved: {os.path.basename(output_filename)} ({len(collected_audio)} bytes)")
-        return output_filename
-    except genai.types.generation_types.BlockedPromptException as bpe:
-        logger.error(f"❌ [{task_id}] Audio blocked: {bpe}")
-        st.error(f"Audio blocked {task_id}.", icon="🔇")
-        return None
-    except TypeError as te:
-        logger.exception(f"❌ [{task_id}] Audio config TypeError: {te}")
-        st.error(f"Audio config error {task_id} (TypeError): {te}. Check library/config.", icon="⚙️")
-        return None
     except Exception as e:
-        logger.exception(f"❌ [{task_id}] Audio failed: {e}")
-        st.error(f"Audio failed {task_id}: {e}", icon="🔊")
         return None
 def generate_story_sequence_chrono(theme: str, num_scenes: int, num_timelines: int, divergence_prompt: str = "") -> Optional[ChronoWeaveResponse]:
@@ -274,7 +258,7 @@ def generate_image_imagen(prompt: str, aspect_ratio: str = "1:1", task_id: str =
     """
     Generates an image using Vertex AI's Imagen model via the Vertex AI preview API.
-    This function loads the pretrained Imagen model "imagen-3.0-generate-002" and attempts to generate an image.
     If authentication fails, it provides guidance on how to resolve the issue.
     """
     logger.info(f"🖼️ [{task_id}] Requesting image: '{prompt[:70]}...' (Aspect: {aspect_ratio})")
@@ -297,11 +281,9 @@ def generate_image_imagen(prompt: str, aspect_ratio: str = "1:1", task_id: str =
         if "Unable to authenticate" in error_str:
             error_msg = (
                 "Authentication error: Unable to authenticate your request. "
-                "If running locally, please run `!gcloud auth login`. "
-                "If running in Colab, try:\n"
-                "    from google.colab import auth\n"
-                "    auth.authenticate_user()\n"
-                "If using a service account or other environment, please refer to https://cloud.google.com/docs/authentication for guidance."
             )
         else:
             error_msg = f"Image generation for {task_id} failed: {e}"
@@ -400,6 +382,7 @@ if generate_button:
                         generated_audio_path: Optional[str] = None
                         if not scene_has_error:
                             with st.spinner(f"[{task_id}] Generating audio... 🔊"):
                                 audio_path_temp = os.path.join(temp_dir, f"{task_id}_audio.wav")
                                 try:
                                     generated_audio_path = asyncio.run(generate_audio_live_async(segment.audio_text, audio_path_temp, audio_voice))
@@ -417,7 +400,7 @@ if generate_button:
                                 temp_audio_files[scene_id] = generated_audio_path
                                 try:
                                     with open(generated_audio_path, 'rb') as ap:
-                                        st.audio(ap.read(), format='audio/wav')
                                 except Exception as e:
                                     logger.warning(f"⚠️ [{task_id}] Audio preview error: {e}")
                             else:

+# Copyright 2025 Google LLC.
+# Based on work by Yousif Ahmed.
 # Concept: ChronoWeave – Branching Narrative Generation
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: https://www.apache.org/licenses/LICENSE-2.0
 import streamlit as st
 import google.generativeai as genai
 import nest_asyncio
 nest_asyncio.apply()
+# Import Vertex AI SDK and service account credentials support
 import vertexai
 from vertexai.preview.vision_models import ImageGenerationModel
 from google.oauth2 import service_account
+# Import gTTS for audio generation
+from gtts import gTTS
 # --- Logging Setup ---
 logging.basicConfig(
     level=logging.INFO,
 TEXT_MODEL_ID = "models/gemini-1.5-flash"
 AUDIO_MODEL_ID = "models/gemini-1.5-flash"
 AUDIO_SAMPLING_RATE = 24000
+# Pretrained Imagen model identifier
 IMAGE_MODEL_ID = "imagen-3.0-generate-002"
 DEFAULT_ASPECT_RATIO = "1:1"
 VIDEO_FPS = 24
         st.error("🚨 **Google API Key Not Found!** Please configure it.", icon="🚨")
         st.stop()
+# Vertex AI configuration: PROJECT_ID and LOCATION
 PROJECT_ID = st.secrets.get("PROJECT_ID") or os.environ.get("PROJECT_ID")
 LOCATION = st.secrets.get("LOCATION") or os.environ.get("LOCATION", "us-central1")
 if not PROJECT_ID:
     st.error("🚨 **PROJECT_ID not set!** Please add PROJECT_ID to your secrets.", icon="🚨")
     st.stop()
+# Load service account JSON from the secret
 try:
     service_account_info = json.loads(os.environ["SERVICE_ACCOUNT_JSON"])
     credentials = service_account.Credentials.from_service_account_info(service_account_info)
             except Exception as e_close:
                 logger.error(f"Error closing wave file {filename}: {e_close}")
+# --- Audio Generation using gTTS ---
+# We replace the previous failing method with gTTS.
 async def generate_audio_live_async(api_text: str, output_filename: str, voice: Optional[str] = None) -> Optional[str]:
+    """
+    Generates audio using gTTS (Google Text-to-Speech).
+    Saves an MP3 file; MoviePy supports MP3 playback.
+    """
     task_id = os.path.basename(output_filename).split('.')[0]
+    logger.info(f"🎙️ [{task_id}] Generating audio via gTTS for text: '{api_text[:60]}...'")
     try:
+        # Generate audio using gTTS
+        tts = gTTS(text=api_text, lang="en")
+        # Replace .wav with .mp3
+        mp3_filename = output_filename.replace(".wav", ".mp3")
+        tts.save(mp3_filename)
+        logger.info(f"✅ [{task_id}] Audio saved: {os.path.basename(mp3_filename)}")
+        return mp3_filename
     except Exception as e:
+        logger.exception(f"❌ [{task_id}] Audio generation error: {e}")
+        st.error(f"Audio generation failed for {task_id}: {e}", icon="🔊")
         return None
 def generate_story_sequence_chrono(theme: str, num_scenes: int, num_timelines: int, divergence_prompt: str = "") -> Optional[ChronoWeaveResponse]:
     """
     Generates an image using Vertex AI's Imagen model via the Vertex AI preview API.
+    This function loads the pretrained Imagen model "imagen-3.0-generate-002" and generates an image.
     If authentication fails, it provides guidance on how to resolve the issue.
     """
     logger.info(f"🖼️ [{task_id}] Requesting image: '{prompt[:70]}...' (Aspect: {aspect_ratio})")
         if "Unable to authenticate" in error_str:
             error_msg = (
                 "Authentication error: Unable to authenticate your request. "
+                "Ensure your service account JSON is loaded correctly. "
+                "For example, on Hugging Face Spaces, set SERVICE_ACCOUNT_JSON in your repository secrets. "
+                "If running locally, run `!gcloud auth login`."
             )
         else:
             error_msg = f"Image generation for {task_id} failed: {e}"
                         generated_audio_path: Optional[str] = None
                         if not scene_has_error:
                             with st.spinner(f"[{task_id}] Generating audio... 🔊"):
+                                # Change output extension to .wav for consistency, but gTTS returns MP3
                                 audio_path_temp = os.path.join(temp_dir, f"{task_id}_audio.wav")
                                 try:
                                     generated_audio_path = asyncio.run(generate_audio_live_async(segment.audio_text, audio_path_temp, audio_voice))
                                 temp_audio_files[scene_id] = generated_audio_path
                                 try:
                                     with open(generated_audio_path, 'rb') as ap:
+                                        st.audio(ap.read(), format='audio/mp3')
                                 except Exception as e:
                                     logger.warning(f"⚠️ [{task_id}] Audio preview error: {e}")
                             else: