Spaces:

MALIBA-AI
/

BambaraText2Speech

Running on Zero

App Files Files Community

sudoping01 commited on 11 days ago

Commit

b254d98

verified ·

1 Parent(s): e849c49

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -78

app.py CHANGED Viewed

@@ -25,68 +25,72 @@ if hf_token:
     login(token=hf_token)
-class ModelSingleton:
-    _instance = None
-    _lock = threading.Lock()
-    def __new__(cls):
-        if cls._instance is None:
-            with cls._lock:
-                if cls._instance is None:
-                    cls._instance = super(ModelSingleton, cls).__new__(cls)
-                    cls._instance.initialized = False
-                    cls._instance.tts_model = None
-                    cls._instance.speakers_dict = None
-                    cls._instance.init_lock = threading.RLock()
-        return cls._instance
-    @spaces.GPU()
-    def initialize(self):
-        """Thread-safe initialization with singleton pattern"""
-        if self.initialized:
-            logger.info("Model already initialized, skipping...")
-            return self.tts_model, self.speakers_dict
-        with self.init_lock:
-            # Double-check pattern
-            if self.initialized:
-                logger.info("Model already initialized (double-check), skipping...")
-                return self.tts_model, self.speakers_dict
-            logger.info("Initializing Bambara TTS model...")
-            start_time = time.time()
-            try:
-                from maliba_ai.tts.inference import BambaraTTSInference
-                from maliba_ai.config.speakers import Adame, Moussa, Bourama, Modibo, Seydou
-                self.tts_model = BambaraTTSInference()
-                self.speakers_dict = {
-                    "Adama": Adame,
-                    "Moussa": Moussa,
-                    "Bourama": Bourama,
-                    "Modibo": Modibo,
-                    "Seydou": Seydou
-                }
-                self.initialized = True
-                elapsed = time.time() - start_time
-                logger.info(f"Model initialized successfully in {elapsed:.2f} seconds!")
-            except Exception as e:
-                logger.error(f"Failed to initialize model: {e}")
-                raise e
-        return self.tts_model, self.speakers_dict
-    def get_model(self):
-        """Get the model, initializing if needed"""
-        if not self.initialized:
-            return self.initialize()
-        return self.tts_model, self.speakers_dict
-# Global singleton instance
-model_singleton = ModelSingleton()
 def validate_inputs(text, temperature, top_k, top_p, max_tokens):
     if not text or not text.strip():
@@ -109,10 +113,17 @@ def generate_speech(text, speaker_name, use_advanced, temperature, top_k, top_p,
         return None, "Please enter some Bambara text."
     try:
-        # Get model through singleton
-        tts, speakers = model_singleton.get_model()
         speaker = speakers[speaker_name]
         if use_advanced:
             is_valid, error_msg = validate_inputs(text, temperature, top_k, top_p, max_tokens)
@@ -133,30 +144,20 @@ def generate_speech(text, speaker_name, use_advanced, temperature, top_k, top_p,
                 speaker_id=speaker
             )
-        if waveform.size == 0:
             return None, "Failed to generate audio. Please try again."
         sample_rate = 16000
-        return (sample_rate, waveform), f"✅ Audio generated successfully"
     except Exception as e:
         logger.error(f"Speech generation failed: {e}")
         return None, f"❌ Error: {str(e)}"
-# Preload model on startup (optional - comment out if you prefer lazy loading)
-def preload_model():
-    """Preload the model when the app starts"""
-    try:
-        logger.info("Preloading model...")
-        model_singleton.initialize()
-        logger.info("Model preloaded successfully!")
-    except Exception as e:
-        logger.error(f"Failed to preload model: {e}")
-SPEAKER_NAMES = ["Adame", "Moussa", "Bourama", "Modibo", "Seydou"]
 examples = [
-    ["Aw ni ce", "Adame"],
     ["Mali bɛna diya kɔsɛbɛ,  ka a da a kan baara bɛ ka kɛ.", "Moussa"],
     ["Ne bɛ se ka sɛbɛnni yɛlɛma ka kɛ kuma ye", "Bourama"],
     ["I ka kɛnɛ wa?", "Modibo"],
@@ -277,7 +278,7 @@ def build_interface():
             - **Speakers**: 5 different voice options
             - **Sample Rate**: 16kHz
-            **Status**: Model loads once and reuses for all requests
             """)
         def toggle_advanced(use_adv):
@@ -309,8 +310,7 @@ def main():
     """Main function to launch the Gradio interface"""
     logger.info("Starting Bambara TTS Gradio interface.")
-    preload_model()
     interface = build_interface()
     interface.launch(
         server_name="0.0.0.0",

     login(token=hf_token)
+_tts_model = None
+_speakers_dict = None
+_model_initialized = False
+_initialization_in_progress = False
+def get_speakers_dict():
+    """Get speakers dictionary - moved to function to avoid import issues"""
+    try:
+        from maliba_ai.config.speakers import Adame, Moussa, Bourama, Modibo, Seydou
+        return {
+            "Adama": Adame,
+            "Moussa": Moussa,
+            "Bourama": Bourama,
+            "Modibo": Modibo,
+            "Seydou": Seydou
+        }
+    except Exception as e:
+        logger.error(f"Failed to import speakers: {e}")
+        return {}
+@spaces.GPU()
+def initialize_model_once():
+    global _tts_model, _speakers_dict, _model_initialized, _initialization_in_progress
+    if _model_initialized:
+        logger.info("Model already initialized, returning existing instance")
+        return _tts_model, _speakers_dict
+    if _initialization_in_progress:
+        logger.info("Initialization already in progress, waiting...")
+        for _ in range(50):
+            time.sleep(0.1)
+            if _model_initialized:
+                return _tts_model, _speakers_dict
+    _initialization_in_progress = True
+    try:
+        logger.info("Initializing Bambara TTS model...")
+        start_time = time.time()
+        from maliba_ai.tts.inference import BambaraTTSInference
+        model = BambaraTTSInference()
+        speakers = get_speakers_dict()
+        if not speakers:
+            raise ValueError("Failed to load speakers dictionary")
+        _tts_model = model
+        _speakers_dict = speakers
+        _model_initialized = True
+        elapsed = time.time() - start_time
+        logger.info(f"Model initialized successfully in {elapsed:.2f} seconds!")
+        return _tts_model, _speakers_dict
+    except Exception as e:
+        logger.error(f"Failed to initialize model: {e}")
+        _initialization_in_progress = False
+        raise e
+    finally:
+        _initialization_in_progress = False
 def validate_inputs(text, temperature, top_k, top_p, max_tokens):
     if not text or not text.strip():
         return None, "Please enter some Bambara text."
     try:
+        tts, speakers = initialize_model_once()
+        if not tts or not speakers:
+            return None, "❌ Model not properly initialized"
+        if speaker_name not in speakers:
+            available_speakers = list(speakers.keys())
+            return None, f"❌ Speaker '{speaker_name}' not found. Available: {available_speakers}"
         speaker = speakers[speaker_name]
+        logger.info(f"Using speaker: {speaker_name}")
         if use_advanced:
             is_valid, error_msg = validate_inputs(text, temperature, top_k, top_p, max_tokens)
                 speaker_id=speaker
             )
+        if waveform is None or waveform.size == 0:
             return None, "Failed to generate audio. Please try again."
         sample_rate = 16000
+        return (sample_rate, waveform), f"✅ Audio generated successfully for speaker {speaker_name}"
     except Exception as e:
         logger.error(f"Speech generation failed: {e}")
         return None, f"❌ Error: {str(e)}"
+SPEAKER_NAMES = ["Adama", "Moussa", "Bourama", "Modibo", "Seydou"]
 examples = [
+    ["Aw ni ce", "Adama"],
     ["Mali bɛna diya kɔsɛbɛ,  ka a da a kan baara bɛ ka kɛ.", "Moussa"],
     ["Ne bɛ se ka sɛbɛnni yɛlɛma ka kɛ kuma ye", "Bourama"],
     ["I ka kɛnɛ wa?", "Modibo"],
             - **Speakers**: 5 different voice options
             - **Sample Rate**: 16kHz
+            **Model loads once on first request and stays in memory**
             """)
         def toggle_advanced(use_adv):
     """Main function to launch the Gradio interface"""
     logger.info("Starting Bambara TTS Gradio interface.")
+    # DO NOT preload - let it initialize on first request only
     interface = build_interface()
     interface.launch(
         server_name="0.0.0.0",