Spaces:
Runtime error
Runtime error
File size: 5,700 Bytes
3f1840e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
"""
Armenian TTS - HuggingFace Spaces Compatible
===========================================
Final version optimized for HF Spaces with Gradio 3.x compatibility.
"""
import gradio as gr
import numpy as np
import logging
import os
import sys
# Minimal logging setup
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
# Global variables
tts_pipeline = None
pipeline_ready = False
def initialize_tts():
"""Initialize TTS pipeline with comprehensive error handling."""
global tts_pipeline, pipeline_ready
try:
# Setup path for imports
current_dir = os.path.dirname(os.path.abspath(__file__))
src_path = os.path.join(current_dir, 'src')
if src_path not in sys.path:
sys.path.insert(0, src_path)
# Import and initialize pipeline
from src.pipeline import TTSPipeline
logger.info("Initializing TTS pipeline...")
tts_pipeline = TTSPipeline(
model_checkpoint="Edmon02/TTS_NB_2",
max_chunk_length=200,
crossfade_duration=0.1,
use_mixed_precision=True
)
# Apply optimizations
tts_pipeline.optimize_for_production()
pipeline_ready = True
logger.info("TTS pipeline ready!")
return True
except Exception as e:
logger.error(f"Failed to initialize TTS pipeline: {e}")
pipeline_ready = False
return False
def synthesize_speech(text):
"""
Main synthesis function with fallback handling.
Args:
text (str): Armenian text to synthesize
Returns:
tuple: (sample_rate, audio_array)
"""
# Validate input
if not text or not isinstance(text, str) or not text.strip():
return create_silence(1.0)
# Check pipeline status
if not pipeline_ready or tts_pipeline is None:
logger.warning("Pipeline not ready, generating fallback audio")
return create_fallback_audio(text)
try:
logger.info(f"Synthesizing: {text[:50]}...")
# Generate speech using pipeline
sample_rate, audio = tts_pipeline.synthesize(
text=text.strip(),
speaker="BDL",
enable_chunking=True,
apply_audio_processing=True
)
logger.info(f"Generated {len(audio)} samples at {sample_rate}Hz")
return sample_rate, audio
except Exception as e:
logger.error(f"Synthesis error: {e}")
return create_fallback_audio(text)
def create_silence(duration_seconds=1.0):
"""Create silence audio."""
sample_rate = 16000
samples = int(duration_seconds * sample_rate)
return sample_rate, np.zeros(samples, dtype=np.int16)
def create_fallback_audio(text):
"""Create simple fallback audio based on text."""
# Calculate duration based on text length
duration = min(max(len(text) * 0.1, 0.5), 5.0)
sample_rate = 16000
samples = int(duration * sample_rate)
# Generate simple tone
t = np.linspace(0, duration, samples)
# Create a pleasant tone sequence
base_freq = 440 # A4
audio = np.sin(2 * np.pi * base_freq * t) * 0.3
# Add some variation for longer texts
if len(text) > 20:
audio += np.sin(2 * np.pi * (base_freq * 1.5) * t) * 0.2
# Apply fade in/out
fade_samples = min(samples // 10, 1000)
if fade_samples > 0:
fade_in = np.linspace(0, 1, fade_samples)
fade_out = np.linspace(1, 0, fade_samples)
audio[:fade_samples] *= fade_in
audio[-fade_samples:] *= fade_out
# Convert to int16
audio_int16 = (audio * 32767).astype(np.int16)
return sample_rate, audio_int16
# Initialize the pipeline
logger.info("Starting Armenian TTS application...")
init_success = initialize_tts()
if init_success:
app_status = "🟢 TTS System Ready"
app_description = """
🎤 **Armenian Text-to-Speech System**
Convert Armenian text to natural speech using SpeechT5.
**How to use:**
1. Enter Armenian text in the box below
2. Click Submit to generate speech
3. Play the generated audio
**Tips for best results:**
- Use standard Armenian script (Unicode)
- Include punctuation for natural pauses
- Shorter sentences work better for quality
"""
else:
app_status = "🟡 Test Mode (Limited Functionality)"
app_description = """
🎤 **Armenian Text-to-Speech System - Test Mode**
The TTS system is running in test mode with limited functionality.
Text input will generate simple audio tones as placeholders.
"""
# Create the Gradio interface using Gradio 3.x syntax
demo = gr.Interface(
fn=synthesize_speech,
inputs=gr.inputs.Textbox(
lines=3,
placeholder="Մուտքագրեք ձեր հայերեն տեքստը այստեղ...",
label="Armenian Text Input"
),
outputs=gr.outputs.Audio(
label="Generated Speech"
),
title=f"🇦🇲 Armenian Text-to-Speech {app_status}",
description=app_description,
examples=[
"Բարև ձեզ, ինչպե՞ս եք:",
"Այսօր գեղեցիկ օր է:",
"Շնորհակալություն:",
"Հայաստան իմ սիրելի երկիրն է:",
"Երևանը Հայաստանի մայրաքաղաքն է:"
],
theme="default",
allow_screenshot=False,
allow_flagging="never"
)
# Launch the application
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
debug=False,
quiet=False
)
|