Spaces:

Edmon02
/

SpeechT5_hy

Runtime error

File size: 6,427 Bytes

d2f6021

"""
SpeechT5 Armenian TTS - HuggingFace Spaces Deployment Version
============================================================

Simplified and optimized for HuggingFace Spaces deployment.
"""

import gradio as gr
import numpy as np
import logging
import time
from typing import Tuple, Optional
import os
import sys

# Add src to path for imports
current_dir = os.path.dirname(os.path.abspath(__file__))
src_path = os.path.join(current_dir, 'src')
if src_path not in sys.path:
    sys.path.insert(0, src_path)

try:
    from src.pipeline import TTSPipeline
    HAS_PIPELINE = True
except ImportError as e:
    logging.error(f"Failed to import pipeline: {e}")
    # Fallback import attempt
    sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
    try:
        from src.pipeline import TTSPipeline
        HAS_PIPELINE = True
    except ImportError:
        HAS_PIPELINE = False
        # Create a dummy pipeline for testing
        class TTSPipeline:
            def __init__(self, *args, **kwargs):
                pass
            def synthesize(self, text, **kwargs):
                # Return dummy audio for testing
                duration = min(len(text) * 0.1, 5.0)  # Approximate duration
                sample_rate = 16000
                samples = int(duration * sample_rate)
                # Generate a simple sine wave as placeholder
                t = np.linspace(0, duration, samples)
                frequency = 440  # A4 note
                audio = (np.sin(2 * np.pi * frequency * t) * 0.3).astype(np.float32)
                return sample_rate, (audio * 32767).astype(np.int16)
            def optimize_for_production(self):
                pass

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Global pipeline instance
tts_pipeline: Optional[TTSPipeline] = None


def initialize_pipeline():
    """Initialize the TTS pipeline with error handling."""
    global tts_pipeline
    
    if not HAS_PIPELINE:
        logger.warning("Pipeline not available - using dummy implementation")
        tts_pipeline = TTSPipeline()
        return True
    
    try:
        logger.info("Initializing TTS Pipeline...")
        tts_pipeline = TTSPipeline(
            model_checkpoint="Edmon02/TTS_NB_2",
            max_chunk_length=200,
            crossfade_duration=0.1,
            use_mixed_precision=True
        )
        
        # Apply production optimizations
        tts_pipeline.optimize_for_production()
        
        logger.info("TTS Pipeline initialized successfully")
        return True
        
    except Exception as e:
        logger.error(f"Failed to initialize TTS pipeline: {e}")
        # Fallback to dummy pipeline
        tts_pipeline = TTSPipeline()
        return False


def generate_speech(text: str) -> Tuple[int, np.ndarray]:
    """
    Main synthesis function optimized for HF Spaces.
    
    Args:
        text: Input text to synthesize
        
    Returns:
        Tuple of (sample_rate, audio_array)
    """
    global tts_pipeline
    
    start_time = time.time()
    
    try:
        # Validate inputs
        if not text or not text.strip():
            logger.warning("Empty text provided")
            return 16000, np.zeros(1000, dtype=np.int16)
        
        if tts_pipeline is None:
            logger.error("TTS pipeline not initialized")
            return 16000, np.zeros(1000, dtype=np.int16)
        
        # Log request
        logger.info(f"Processing request: {len(text)} characters")
        
        # Synthesize speech with default settings
        sample_rate, audio = tts_pipeline.synthesize(
            text=text,
            speaker="BDL",
            enable_chunking=True,
            apply_audio_processing=True
        )
        
        # Log performance
        total_time = time.time() - start_time
        logger.info(f"Request completed in {total_time:.3f}s")
        
        return sample_rate, audio
        
    except Exception as e:
        logger.error(f"Synthesis failed: {e}")
        return 16000, np.zeros(1000, dtype=np.int16)


# Create the Gradio interface
def create_app():
    """Create the main Gradio application."""
    
    # Simple interface definition
    interface = gr.Interface(
        fn=generate_speech,
        inputs=[
            gr.Textbox(
                label="Armenian Text",
                placeholder="Մուտքագրեք ձեր տեքստը այստեղ...",
                lines=3,
                max_lines=10
            )
        ],
        outputs=[
            gr.Audio(
                label="Generated Speech",
                type="numpy"
            )
        ],
        title="🎤 SpeechT5 Armenian Text-to-Speech",
        description="""
        Convert Armenian text to natural speech using SpeechT5.
        
        **Instructions:**
        1. Enter Armenian text in the input box
        2. Click Submit to generate speech
        3. Listen to the generated audio
        
        **Tips:**
        - Works best with standard Armenian orthography
        - Shorter sentences produce better quality
        - Include proper punctuation for natural pauses
        """,
        examples=[
            ["Բարև ձեզ, ինչպե՞ս եք:"],
            ["Այսօր գեղեցիկ օր է:"],
            ["Հայաստանն ունի հարուստ պատմություն:"],
            ["Երևանը Հայաստանի մայրաքաղաքն է:"],
            ["Արարատ լեռը Հայաստանի խորհրդանիշն է:"]
        ],
        theme=gr.themes.Soft(),
        allow_flagging="never",  # Disable flagging to avoid schema issues
        cache_examples=False     # Disable example caching
    )
    
    return interface


def main():
    """Main application entry point."""
    logger.info("Starting SpeechT5 Armenian TTS Application")
    
    # Initialize pipeline
    if not initialize_pipeline():
        logger.error("Failed to initialize TTS pipeline - continuing with limited functionality")
    
    # Create and launch interface
    app = create_app()
    
    # Launch with HF Spaces settings
    app.launch(
        share=False,      # Don't create share link on HF Spaces
        server_name="0.0.0.0",
        server_port=7860,
        show_error=True
    )


if __name__ == "__main__":
    main()