""" Armenian TTS - HuggingFace Spaces Compatible =========================================== Final version optimized for HF Spaces with Gradio 3.x compatibility. """ import gradio as gr import numpy as np import logging import os import sys # Minimal logging setup logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) # Global variables tts_pipeline = None pipeline_ready = False def initialize_tts(): """Initialize TTS pipeline with comprehensive error handling.""" global tts_pipeline, pipeline_ready try: # Setup path for imports current_dir = os.path.dirname(os.path.abspath(__file__)) src_path = os.path.join(current_dir, 'src') if src_path not in sys.path: sys.path.insert(0, src_path) # Import and initialize pipeline from src.pipeline import TTSPipeline logger.info("Initializing TTS pipeline...") tts_pipeline = TTSPipeline( model_checkpoint="Edmon02/TTS_NB_2", max_chunk_length=200, crossfade_duration=0.1, use_mixed_precision=True ) # Apply optimizations tts_pipeline.optimize_for_production() pipeline_ready = True logger.info("TTS pipeline ready!") return True except Exception as e: logger.error(f"Failed to initialize TTS pipeline: {e}") pipeline_ready = False return False def synthesize_speech(text): """ Main synthesis function with fallback handling. Args: text (str): Armenian text to synthesize Returns: tuple: (sample_rate, audio_array) """ # Validate input if not text or not isinstance(text, str) or not text.strip(): return create_silence(1.0) # Check pipeline status if not pipeline_ready or tts_pipeline is None: logger.warning("Pipeline not ready, generating fallback audio") return create_fallback_audio(text) try: logger.info(f"Synthesizing: {text[:50]}...") # Generate speech using pipeline sample_rate, audio = tts_pipeline.synthesize( text=text.strip(), speaker="BDL", enable_chunking=True, apply_audio_processing=True ) logger.info(f"Generated {len(audio)} samples at {sample_rate}Hz") return sample_rate, audio except Exception as e: logger.error(f"Synthesis error: {e}") return create_fallback_audio(text) def create_silence(duration_seconds=1.0): """Create silence audio.""" sample_rate = 16000 samples = int(duration_seconds * sample_rate) return sample_rate, np.zeros(samples, dtype=np.int16) def create_fallback_audio(text): """Create simple fallback audio based on text.""" # Calculate duration based on text length duration = min(max(len(text) * 0.1, 0.5), 5.0) sample_rate = 16000 samples = int(duration * sample_rate) # Generate simple tone t = np.linspace(0, duration, samples) # Create a pleasant tone sequence base_freq = 440 # A4 audio = np.sin(2 * np.pi * base_freq * t) * 0.3 # Add some variation for longer texts if len(text) > 20: audio += np.sin(2 * np.pi * (base_freq * 1.5) * t) * 0.2 # Apply fade in/out fade_samples = min(samples // 10, 1000) if fade_samples > 0: fade_in = np.linspace(0, 1, fade_samples) fade_out = np.linspace(1, 0, fade_samples) audio[:fade_samples] *= fade_in audio[-fade_samples:] *= fade_out # Convert to int16 audio_int16 = (audio * 32767).astype(np.int16) return sample_rate, audio_int16 # Initialize the pipeline logger.info("Starting Armenian TTS application...") init_success = initialize_tts() if init_success: app_status = "🟢 TTS System Ready" app_description = """ 🎤 **Armenian Text-to-Speech System** Convert Armenian text to natural speech using SpeechT5. **How to use:** 1. Enter Armenian text in the box below 2. Click Submit to generate speech 3. Play the generated audio **Tips for best results:** - Use standard Armenian script (Unicode) - Include punctuation for natural pauses - Shorter sentences work better for quality """ else: app_status = "🟡 Test Mode (Limited Functionality)" app_description = """ 🎤 **Armenian Text-to-Speech System - Test Mode** The TTS system is running in test mode with limited functionality. Text input will generate simple audio tones as placeholders. """ # Create the Gradio interface using Gradio 3.x syntax demo = gr.Interface( fn=synthesize_speech, inputs=gr.inputs.Textbox( lines=3, placeholder="Մուտքագրեք ձեր հայերեն տեքստը այստեղ...", label="Armenian Text Input" ), outputs=gr.outputs.Audio( label="Generated Speech" ), title=f"🇦🇲 Armenian Text-to-Speech {app_status}", description=app_description, examples=[ "Բարև ձեզ, ինչպե՞ս եք:", "Այսօր գեղեցիկ օր է:", "Շնորհակալություն:", "Հայաստան իմ սիրելի երկիրն է:", "Երևանը Հայաստանի մայրաքաղաքն է:" ], theme="default", allow_screenshot=False, allow_flagging="never" ) # Launch the application if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, share=False, debug=False, quiet=False )