File size: 5,700 Bytes
3f1840e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
"""
Armenian TTS - HuggingFace Spaces Compatible
===========================================

Final version optimized for HF Spaces with Gradio 3.x compatibility.
"""

import gradio as gr
import numpy as np
import logging
import os
import sys

# Minimal logging setup
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

# Global variables
tts_pipeline = None
pipeline_ready = False

def initialize_tts():
    """Initialize TTS pipeline with comprehensive error handling."""
    global tts_pipeline, pipeline_ready
    
    try:
        # Setup path for imports
        current_dir = os.path.dirname(os.path.abspath(__file__))
        src_path = os.path.join(current_dir, 'src')
        if src_path not in sys.path:
            sys.path.insert(0, src_path)
        
        # Import and initialize pipeline
        from src.pipeline import TTSPipeline
        
        logger.info("Initializing TTS pipeline...")
        tts_pipeline = TTSPipeline(
            model_checkpoint="Edmon02/TTS_NB_2",
            max_chunk_length=200,
            crossfade_duration=0.1,
            use_mixed_precision=True
        )
        
        # Apply optimizations
        tts_pipeline.optimize_for_production()
        pipeline_ready = True
        logger.info("TTS pipeline ready!")
        return True
        
    except Exception as e:
        logger.error(f"Failed to initialize TTS pipeline: {e}")
        pipeline_ready = False
        return False

def synthesize_speech(text):
    """
    Main synthesis function with fallback handling.
    
    Args:
        text (str): Armenian text to synthesize
        
    Returns:
        tuple: (sample_rate, audio_array)
    """
    # Validate input
    if not text or not isinstance(text, str) or not text.strip():
        return create_silence(1.0)
    
    # Check pipeline status
    if not pipeline_ready or tts_pipeline is None:
        logger.warning("Pipeline not ready, generating fallback audio")
        return create_fallback_audio(text)
    
    try:
        logger.info(f"Synthesizing: {text[:50]}...")
        
        # Generate speech using pipeline
        sample_rate, audio = tts_pipeline.synthesize(
            text=text.strip(),
            speaker="BDL",
            enable_chunking=True,
            apply_audio_processing=True
        )
        
        logger.info(f"Generated {len(audio)} samples at {sample_rate}Hz")
        return sample_rate, audio
        
    except Exception as e:
        logger.error(f"Synthesis error: {e}")
        return create_fallback_audio(text)

def create_silence(duration_seconds=1.0):
    """Create silence audio."""
    sample_rate = 16000
    samples = int(duration_seconds * sample_rate)
    return sample_rate, np.zeros(samples, dtype=np.int16)

def create_fallback_audio(text):
    """Create simple fallback audio based on text."""
    # Calculate duration based on text length
    duration = min(max(len(text) * 0.1, 0.5), 5.0)
    sample_rate = 16000
    samples = int(duration * sample_rate)
    
    # Generate simple tone
    t = np.linspace(0, duration, samples)
    
    # Create a pleasant tone sequence
    base_freq = 440  # A4
    audio = np.sin(2 * np.pi * base_freq * t) * 0.3
    
    # Add some variation for longer texts
    if len(text) > 20:
        audio += np.sin(2 * np.pi * (base_freq * 1.5) * t) * 0.2
    
    # Apply fade in/out
    fade_samples = min(samples // 10, 1000)
    if fade_samples > 0:
        fade_in = np.linspace(0, 1, fade_samples)
        fade_out = np.linspace(1, 0, fade_samples)
        audio[:fade_samples] *= fade_in
        audio[-fade_samples:] *= fade_out
    
    # Convert to int16
    audio_int16 = (audio * 32767).astype(np.int16)
    
    return sample_rate, audio_int16

# Initialize the pipeline
logger.info("Starting Armenian TTS application...")
init_success = initialize_tts()

if init_success:
    app_status = "🟢 TTS System Ready"
    app_description = """
    🎤 **Armenian Text-to-Speech System**
    
    Convert Armenian text to natural speech using SpeechT5.
    
    **How to use:**
    1. Enter Armenian text in the box below
    2. Click Submit to generate speech
    3. Play the generated audio
    
    **Tips for best results:**
    - Use standard Armenian script (Unicode)
    - Include punctuation for natural pauses
    - Shorter sentences work better for quality
    """
else:
    app_status = "🟡 Test Mode (Limited Functionality)"
    app_description = """
    🎤 **Armenian Text-to-Speech System - Test Mode**
    
    The TTS system is running in test mode with limited functionality.
    Text input will generate simple audio tones as placeholders.
    """

# Create the Gradio interface using Gradio 3.x syntax
demo = gr.Interface(
    fn=synthesize_speech,
    inputs=gr.inputs.Textbox(
        lines=3,
        placeholder="Մուտքագրեք ձեր հայերեն տեքստը այստեղ...",
        label="Armenian Text Input"
    ),
    outputs=gr.outputs.Audio(
        label="Generated Speech"
    ),
    title=f"🇦🇲 Armenian Text-to-Speech {app_status}",
    description=app_description,
    examples=[
        "Բարև ձեզ, ինչպե՞ս եք:",
        "Այսօր գեղեցիկ օր է:",
        "Շնորհակալություն:",
        "Հայաստան իմ սիրելի երկիրն է:",
        "Երևանը Հայաստանի մայրաքաղաքն է:"
    ],
    theme="default",
    allow_screenshot=False,
    allow_flagging="never"
)

# Launch the application
if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        debug=False,
        quiet=False
    )