File size: 6,427 Bytes
d2f6021
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
"""
SpeechT5 Armenian TTS - HuggingFace Spaces Deployment Version
============================================================

Simplified and optimized for HuggingFace Spaces deployment.
"""

import gradio as gr
import numpy as np
import logging
import time
from typing import Tuple, Optional
import os
import sys

# Add src to path for imports
current_dir = os.path.dirname(os.path.abspath(__file__))
src_path = os.path.join(current_dir, 'src')
if src_path not in sys.path:
    sys.path.insert(0, src_path)

try:
    from src.pipeline import TTSPipeline
    HAS_PIPELINE = True
except ImportError as e:
    logging.error(f"Failed to import pipeline: {e}")
    # Fallback import attempt
    sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
    try:
        from src.pipeline import TTSPipeline
        HAS_PIPELINE = True
    except ImportError:
        HAS_PIPELINE = False
        # Create a dummy pipeline for testing
        class TTSPipeline:
            def __init__(self, *args, **kwargs):
                pass
            def synthesize(self, text, **kwargs):
                # Return dummy audio for testing
                duration = min(len(text) * 0.1, 5.0)  # Approximate duration
                sample_rate = 16000
                samples = int(duration * sample_rate)
                # Generate a simple sine wave as placeholder
                t = np.linspace(0, duration, samples)
                frequency = 440  # A4 note
                audio = (np.sin(2 * np.pi * frequency * t) * 0.3).astype(np.float32)
                return sample_rate, (audio * 32767).astype(np.int16)
            def optimize_for_production(self):
                pass

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Global pipeline instance
tts_pipeline: Optional[TTSPipeline] = None


def initialize_pipeline():
    """Initialize the TTS pipeline with error handling."""
    global tts_pipeline
    
    if not HAS_PIPELINE:
        logger.warning("Pipeline not available - using dummy implementation")
        tts_pipeline = TTSPipeline()
        return True
    
    try:
        logger.info("Initializing TTS Pipeline...")
        tts_pipeline = TTSPipeline(
            model_checkpoint="Edmon02/TTS_NB_2",
            max_chunk_length=200,
            crossfade_duration=0.1,
            use_mixed_precision=True
        )
        
        # Apply production optimizations
        tts_pipeline.optimize_for_production()
        
        logger.info("TTS Pipeline initialized successfully")
        return True
        
    except Exception as e:
        logger.error(f"Failed to initialize TTS pipeline: {e}")
        # Fallback to dummy pipeline
        tts_pipeline = TTSPipeline()
        return False


def generate_speech(text: str) -> Tuple[int, np.ndarray]:
    """
    Main synthesis function optimized for HF Spaces.
    
    Args:
        text: Input text to synthesize
        
    Returns:
        Tuple of (sample_rate, audio_array)
    """
    global tts_pipeline
    
    start_time = time.time()
    
    try:
        # Validate inputs
        if not text or not text.strip():
            logger.warning("Empty text provided")
            return 16000, np.zeros(1000, dtype=np.int16)
        
        if tts_pipeline is None:
            logger.error("TTS pipeline not initialized")
            return 16000, np.zeros(1000, dtype=np.int16)
        
        # Log request
        logger.info(f"Processing request: {len(text)} characters")
        
        # Synthesize speech with default settings
        sample_rate, audio = tts_pipeline.synthesize(
            text=text,
            speaker="BDL",
            enable_chunking=True,
            apply_audio_processing=True
        )
        
        # Log performance
        total_time = time.time() - start_time
        logger.info(f"Request completed in {total_time:.3f}s")
        
        return sample_rate, audio
        
    except Exception as e:
        logger.error(f"Synthesis failed: {e}")
        return 16000, np.zeros(1000, dtype=np.int16)


# Create the Gradio interface
def create_app():
    """Create the main Gradio application."""
    
    # Simple interface definition
    interface = gr.Interface(
        fn=generate_speech,
        inputs=[
            gr.Textbox(
                label="Armenian Text",
                placeholder="Մուտքագրեք ձեր տեքստը այստեղ...",
                lines=3,
                max_lines=10
            )
        ],
        outputs=[
            gr.Audio(
                label="Generated Speech",
                type="numpy"
            )
        ],
        title="🎤 SpeechT5 Armenian Text-to-Speech",
        description="""
        Convert Armenian text to natural speech using SpeechT5.
        
        **Instructions:**
        1. Enter Armenian text in the input box
        2. Click Submit to generate speech
        3. Listen to the generated audio
        
        **Tips:**
        - Works best with standard Armenian orthography
        - Shorter sentences produce better quality
        - Include proper punctuation for natural pauses
        """,
        examples=[
            ["Բարև ձեզ, ինչպե՞ս եք:"],
            ["Այսօր գեղեցիկ օր է:"],
            ["Հայաստանն ունի հարուստ պատմություն:"],
            ["Երևանը Հայաստանի մայրաքաղաքն է:"],
            ["Արարատ լեռը Հայաստանի խորհրդանիշն է:"]
        ],
        theme=gr.themes.Soft(),
        allow_flagging="never",  # Disable flagging to avoid schema issues
        cache_examples=False     # Disable example caching
    )
    
    return interface


def main():
    """Main application entry point."""
    logger.info("Starting SpeechT5 Armenian TTS Application")
    
    # Initialize pipeline
    if not initialize_pipeline():
        logger.error("Failed to initialize TTS pipeline - continuing with limited functionality")
    
    # Create and launch interface
    app = create_app()
    
    # Launch with HF Spaces settings
    app.launch(
        share=False,      # Don't create share link on HF Spaces
        server_name="0.0.0.0",
        server_port=7860,
        show_error=True
    )


if __name__ == "__main__":
    main()