Spaces:
Runtime error
Runtime error
File size: 6,427 Bytes
d2f6021 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
"""
SpeechT5 Armenian TTS - HuggingFace Spaces Deployment Version
============================================================
Simplified and optimized for HuggingFace Spaces deployment.
"""
import gradio as gr
import numpy as np
import logging
import time
from typing import Tuple, Optional
import os
import sys
# Add src to path for imports
current_dir = os.path.dirname(os.path.abspath(__file__))
src_path = os.path.join(current_dir, 'src')
if src_path not in sys.path:
sys.path.insert(0, src_path)
try:
from src.pipeline import TTSPipeline
HAS_PIPELINE = True
except ImportError as e:
logging.error(f"Failed to import pipeline: {e}")
# Fallback import attempt
sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
try:
from src.pipeline import TTSPipeline
HAS_PIPELINE = True
except ImportError:
HAS_PIPELINE = False
# Create a dummy pipeline for testing
class TTSPipeline:
def __init__(self, *args, **kwargs):
pass
def synthesize(self, text, **kwargs):
# Return dummy audio for testing
duration = min(len(text) * 0.1, 5.0) # Approximate duration
sample_rate = 16000
samples = int(duration * sample_rate)
# Generate a simple sine wave as placeholder
t = np.linspace(0, duration, samples)
frequency = 440 # A4 note
audio = (np.sin(2 * np.pi * frequency * t) * 0.3).astype(np.float32)
return sample_rate, (audio * 32767).astype(np.int16)
def optimize_for_production(self):
pass
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Global pipeline instance
tts_pipeline: Optional[TTSPipeline] = None
def initialize_pipeline():
"""Initialize the TTS pipeline with error handling."""
global tts_pipeline
if not HAS_PIPELINE:
logger.warning("Pipeline not available - using dummy implementation")
tts_pipeline = TTSPipeline()
return True
try:
logger.info("Initializing TTS Pipeline...")
tts_pipeline = TTSPipeline(
model_checkpoint="Edmon02/TTS_NB_2",
max_chunk_length=200,
crossfade_duration=0.1,
use_mixed_precision=True
)
# Apply production optimizations
tts_pipeline.optimize_for_production()
logger.info("TTS Pipeline initialized successfully")
return True
except Exception as e:
logger.error(f"Failed to initialize TTS pipeline: {e}")
# Fallback to dummy pipeline
tts_pipeline = TTSPipeline()
return False
def generate_speech(text: str) -> Tuple[int, np.ndarray]:
"""
Main synthesis function optimized for HF Spaces.
Args:
text: Input text to synthesize
Returns:
Tuple of (sample_rate, audio_array)
"""
global tts_pipeline
start_time = time.time()
try:
# Validate inputs
if not text or not text.strip():
logger.warning("Empty text provided")
return 16000, np.zeros(1000, dtype=np.int16)
if tts_pipeline is None:
logger.error("TTS pipeline not initialized")
return 16000, np.zeros(1000, dtype=np.int16)
# Log request
logger.info(f"Processing request: {len(text)} characters")
# Synthesize speech with default settings
sample_rate, audio = tts_pipeline.synthesize(
text=text,
speaker="BDL",
enable_chunking=True,
apply_audio_processing=True
)
# Log performance
total_time = time.time() - start_time
logger.info(f"Request completed in {total_time:.3f}s")
return sample_rate, audio
except Exception as e:
logger.error(f"Synthesis failed: {e}")
return 16000, np.zeros(1000, dtype=np.int16)
# Create the Gradio interface
def create_app():
"""Create the main Gradio application."""
# Simple interface definition
interface = gr.Interface(
fn=generate_speech,
inputs=[
gr.Textbox(
label="Armenian Text",
placeholder="Մուտքագրեք ձեր տեքստը այստեղ...",
lines=3,
max_lines=10
)
],
outputs=[
gr.Audio(
label="Generated Speech",
type="numpy"
)
],
title="🎤 SpeechT5 Armenian Text-to-Speech",
description="""
Convert Armenian text to natural speech using SpeechT5.
**Instructions:**
1. Enter Armenian text in the input box
2. Click Submit to generate speech
3. Listen to the generated audio
**Tips:**
- Works best with standard Armenian orthography
- Shorter sentences produce better quality
- Include proper punctuation for natural pauses
""",
examples=[
["Բարև ձեզ, ինչպե՞ս եք:"],
["Այսօր գեղեցիկ օր է:"],
["Հայաստանն ունի հարուստ պատմություն:"],
["Երևանը Հայաստանի մայրաքաղաքն է:"],
["Արարատ լեռը Հայաստանի խորհրդանիշն է:"]
],
theme=gr.themes.Soft(),
allow_flagging="never", # Disable flagging to avoid schema issues
cache_examples=False # Disable example caching
)
return interface
def main():
"""Main application entry point."""
logger.info("Starting SpeechT5 Armenian TTS Application")
# Initialize pipeline
if not initialize_pipeline():
logger.error("Failed to initialize TTS pipeline - continuing with limited functionality")
# Create and launch interface
app = create_app()
# Launch with HF Spaces settings
app.launch(
share=False, # Don't create share link on HF Spaces
server_name="0.0.0.0",
server_port=7860,
show_error=True
)
if __name__ == "__main__":
main()
|