""" Armenian TTS - Minimal HF Spaces Version ======================================= Absolutely minimal version to avoid all possible compatibility issues. """ import gradio as gr import numpy as np import logging import os import sys # Simple logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def setup_pipeline(): """Setup TTS pipeline with maximum error handling.""" try: # Add source path current_dir = os.path.dirname(os.path.abspath(__file__)) src_path = os.path.join(current_dir, 'src') if src_path not in sys.path: sys.path.insert(0, src_path) # Try to import and initialize from src.pipeline import TTSPipeline pipeline = TTSPipeline( model_checkpoint="Edmon02/TTS_NB_2", max_chunk_length=200, use_mixed_precision=True ) pipeline.optimize_for_production() logger.info("TTS pipeline initialized successfully") return pipeline, True except Exception as e: logger.error(f"Pipeline initialization failed: {e}") return None, False def tts_process(text): """Process text to speech with complete error handling.""" global tts_pipeline, pipeline_available # Basic input validation if not text or not isinstance(text, str) or len(text.strip()) == 0: # Return 1 second of silence return 16000, np.zeros(16000, dtype=np.int16) text = text.strip() # If no pipeline available, create a simple audio response if not pipeline_available or tts_pipeline is None: logger.info(f"Using fallback for text: {text[:30]}...") # Create simple fallback audio duration = min(len(text) * 0.08, 4.0) # Max 4 seconds sample_rate = 16000 samples = int(duration * sample_rate) if samples <= 0: return sample_rate, np.zeros(8000, dtype=np.int16) # Generate a simple pleasant tone t = np.linspace(0, duration, samples) frequency = 440 # A4 note audio = np.sin(2 * np.pi * frequency * t) * 0.2 # Add some harmonics for richer sound audio += np.sin(2 * np.pi * frequency * 2 * t) * 0.1 audio += np.sin(2 * np.pi * frequency * 3 * t) * 0.05 # Apply simple envelope envelope = np.exp(-t * 2) # Exponential decay audio *= envelope # Convert to int16 audio_int16 = (audio * 32767).astype(np.int16) return sample_rate, audio_int16 # Try real TTS try: logger.info(f"Synthesizing: {text[:50]}...") sample_rate, audio = tts_pipeline.synthesize( text=text, speaker="BDL", enable_chunking=True, apply_audio_processing=True ) logger.info(f"Successfully generated {len(audio)} samples") return sample_rate, audio except Exception as e: logger.error(f"TTS synthesis failed: {e}") # Fallback to silence return 16000, np.zeros(8000, dtype=np.int16) # Initialize the pipeline once logger.info("Initializing Armenian TTS application...") tts_pipeline, pipeline_available = setup_pipeline() if pipeline_available: title = "🇦🇲 Armenian Text-to-Speech (Ready)" description = "Convert Armenian text to speech using SpeechT5." else: title = "🇦🇲 Armenian TTS (Test Mode)" description = "TTS system in test mode - will generate simple audio tones." # Create the simplest possible Gradio interface app = gr.Interface( fn=tts_process, inputs="text", outputs="audio", title=title, description=description, examples=[ "Բարև ձեզ", "Շնորհակալություն", "Ինչպե՞ս եք" ] ) # Launch the app if __name__ == "__main__": app.launch( server_name="0.0.0.0", server_port=7860, share=False )