""" SpeechT5 Armenian TTS - Production Deployment ============================================ Production-ready version for HuggingFace Spaces with robust error handling. """ import gradio as gr import numpy as np import logging import time import os import sys from typing import Tuple, Optional, Union # Setup logging first logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # Global pipeline variable pipeline = None def safe_import(): """Safely import the TTS pipeline with fallbacks.""" global pipeline try: # Add src to path current_dir = os.path.dirname(os.path.abspath(__file__)) src_path = os.path.join(current_dir, 'src') if src_path not in sys.path: sys.path.insert(0, src_path) # Import pipeline from src.pipeline import TTSPipeline logger.info("Initializing TTS Pipeline...") pipeline = TTSPipeline( model_checkpoint="Edmon02/TTS_NB_2", max_chunk_length=200, crossfade_duration=0.1, use_mixed_precision=True ) # Optimize for production pipeline.optimize_for_production() logger.info("TTS Pipeline ready") return True except Exception as e: logger.error(f"Failed to initialize pipeline: {e}") logger.info("Creating fallback pipeline for testing") # Create a simple fallback class FallbackPipeline: def synthesize(self, text, **kwargs): # Generate simple tone as placeholder duration = min(len(text) * 0.08, 3.0) sample_rate = 16000 samples = int(duration * sample_rate) t = np.linspace(0, duration, samples) # Create a simple beep audio = np.sin(2 * np.pi * 440 * t) * 0.3 return sample_rate, (audio * 32767).astype(np.int16) pipeline = FallbackPipeline() return False def generate_audio(text: str) -> Tuple[int, np.ndarray]: """ Generate audio from Armenian text. Args: text: Armenian text to synthesize Returns: Tuple of (sample_rate, audio_data) """ if not text or not text.strip(): logger.warning("Empty text provided") # Return silence return 16000, np.zeros(1000, dtype=np.int16) if pipeline is None: logger.error("Pipeline not available") return 16000, np.zeros(1000, dtype=np.int16) try: logger.info(f"Processing: {text[:50]}...") start_time = time.time() # Synthesize with basic parameters sample_rate, audio = pipeline.synthesize( text=text, speaker="BDL", enable_chunking=True, apply_audio_processing=True ) duration = time.time() - start_time logger.info(f"Generated {len(audio)} samples in {duration:.2f}s") return sample_rate, audio except Exception as e: logger.error(f"Synthesis error: {e}") # Return silence on error return 16000, np.zeros(1000, dtype=np.int16) # Initialize the pipeline logger.info("Starting TTS application...") initialization_success = safe_import() if initialization_success: status_message = "✅ TTS System Ready" else: status_message = "⚠️ Running in Test Mode (Limited Functionality)" # Create the Gradio interface using the simpler gr.Interface demo = gr.Interface( fn=generate_audio, inputs=gr.Textbox( label="Armenian Text", placeholder="Գրեք ձեր տեքստը այստեղ...", lines=3, max_lines=8 ), outputs=gr.Audio( label="Generated Speech", type="numpy" ), title="🎤 Armenian Text-to-Speech", description=f""" {status_message} Convert Armenian text to speech using SpeechT5. **How to use:** 1. Enter Armenian text in the box below 2. Click Submit to generate speech 3. Play the generated audio **Tips:** - Use standard Armenian script - Shorter sentences work better - Include punctuation for natural pauses """, examples=[ "Բարև ձեզ:", "Ինչպե՞ս եք:", "Շնորհակալություն:", "Կեցցե՛ Հայաստանը:", "Այսօր լավ օր է:" ], theme=gr.themes.Default(), allow_flagging="never" ) # Launch the app if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, share=False )