""" SpeechT5 Armenian TTS - HuggingFace Spaces Deployment Version ============================================================ Simplified and optimized for HuggingFace Spaces deployment. """ import gradio as gr import numpy as np import logging import time from typing import Tuple, Optional import os import sys # Add src to path for imports current_dir = os.path.dirname(os.path.abspath(__file__)) src_path = os.path.join(current_dir, 'src') if src_path not in sys.path: sys.path.insert(0, src_path) try: from src.pipeline import TTSPipeline HAS_PIPELINE = True except ImportError as e: logging.error(f"Failed to import pipeline: {e}") # Fallback import attempt sys.path.append(os.path.join(os.path.dirname(__file__), 'src')) try: from src.pipeline import TTSPipeline HAS_PIPELINE = True except ImportError: HAS_PIPELINE = False # Create a dummy pipeline for testing class TTSPipeline: def __init__(self, *args, **kwargs): pass def synthesize(self, text, **kwargs): # Return dummy audio for testing duration = min(len(text) * 0.1, 5.0) # Approximate duration sample_rate = 16000 samples = int(duration * sample_rate) # Generate a simple sine wave as placeholder t = np.linspace(0, duration, samples) frequency = 440 # A4 note audio = (np.sin(2 * np.pi * frequency * t) * 0.3).astype(np.float32) return sample_rate, (audio * 32767).astype(np.int16) def optimize_for_production(self): pass # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # Global pipeline instance tts_pipeline: Optional[TTSPipeline] = None def initialize_pipeline(): """Initialize the TTS pipeline with error handling.""" global tts_pipeline if not HAS_PIPELINE: logger.warning("Pipeline not available - using dummy implementation") tts_pipeline = TTSPipeline() return True try: logger.info("Initializing TTS Pipeline...") tts_pipeline = TTSPipeline( model_checkpoint="Edmon02/TTS_NB_2", max_chunk_length=200, crossfade_duration=0.1, use_mixed_precision=True ) # Apply production optimizations tts_pipeline.optimize_for_production() logger.info("TTS Pipeline initialized successfully") return True except Exception as e: logger.error(f"Failed to initialize TTS pipeline: {e}") # Fallback to dummy pipeline tts_pipeline = TTSPipeline() return False def generate_speech(text: str) -> Tuple[int, np.ndarray]: """ Main synthesis function optimized for HF Spaces. Args: text: Input text to synthesize Returns: Tuple of (sample_rate, audio_array) """ global tts_pipeline start_time = time.time() try: # Validate inputs if not text or not text.strip(): logger.warning("Empty text provided") return 16000, np.zeros(1000, dtype=np.int16) if tts_pipeline is None: logger.error("TTS pipeline not initialized") return 16000, np.zeros(1000, dtype=np.int16) # Log request logger.info(f"Processing request: {len(text)} characters") # Synthesize speech with default settings sample_rate, audio = tts_pipeline.synthesize( text=text, speaker="BDL", enable_chunking=True, apply_audio_processing=True ) # Log performance total_time = time.time() - start_time logger.info(f"Request completed in {total_time:.3f}s") return sample_rate, audio except Exception as e: logger.error(f"Synthesis failed: {e}") return 16000, np.zeros(1000, dtype=np.int16) # Create the Gradio interface def create_app(): """Create the main Gradio application.""" # Simple interface definition interface = gr.Interface( fn=generate_speech, inputs=[ gr.Textbox( label="Armenian Text", placeholder="Մուտքագրեք ձեր տեքստը այստեղ...", lines=3, max_lines=10 ) ], outputs=[ gr.Audio( label="Generated Speech", type="numpy" ) ], title="🎤 SpeechT5 Armenian Text-to-Speech", description=""" Convert Armenian text to natural speech using SpeechT5. **Instructions:** 1. Enter Armenian text in the input box 2. Click Submit to generate speech 3. Listen to the generated audio **Tips:** - Works best with standard Armenian orthography - Shorter sentences produce better quality - Include proper punctuation for natural pauses """, examples=[ ["Բարև ձեզ, ինչպե՞ս եք:"], ["Այսօր գեղեցիկ օր է:"], ["Հայաստանն ունի հարուստ պատմություն:"], ["Երևանը Հայաստանի մայրաքաղաքն է:"], ["Արարատ լեռը Հայաստանի խորհրդանիշն է:"] ], theme=gr.themes.Soft(), allow_flagging="never", # Disable flagging to avoid schema issues cache_examples=False # Disable example caching ) return interface def main(): """Main application entry point.""" logger.info("Starting SpeechT5 Armenian TTS Application") # Initialize pipeline if not initialize_pipeline(): logger.error("Failed to initialize TTS pipeline - continuing with limited functionality") # Create and launch interface app = create_app() # Launch with HF Spaces settings app.launch( share=False, # Don't create share link on HF Spaces server_name="0.0.0.0", server_port=7860, show_error=True ) if __name__ == "__main__": main()