SpeechT5_hy / archive /app_ultracompat.py
Edmon02's picture
feat: Implement project organization plan and optimize TTS deployment
3f1840e
"""
SpeechT5 Armenian TTS - Ultra-Compatible Version
===============================================
Maximum compatibility version for HuggingFace Spaces.
"""
import gradio as gr
import numpy as np
import logging
import os
import sys
import warnings
# Suppress warnings that might interfere
warnings.filterwarnings("ignore")
# Setup minimal logging
logging.basicConfig(level=logging.WARNING) # Reduce log noise
logger = logging.getLogger(__name__)
def safe_tts(text):
"""
Ultra-safe TTS function that handles all errors gracefully.
"""
# Input validation
if not isinstance(text, str) or not text.strip():
return generate_silence()
try:
# Try to import and use the real pipeline
current_dir = os.path.dirname(os.path.abspath(__file__))
src_path = os.path.join(current_dir, 'src')
if src_path not in sys.path:
sys.path.insert(0, src_path)
from src.pipeline import TTSPipeline
# Initialize pipeline if not done
if not hasattr(safe_tts, 'pipeline'):
safe_tts.pipeline = TTSPipeline(
model_checkpoint="Edmon02/TTS_NB_2",
max_chunk_length=200,
use_mixed_precision=True
)
safe_tts.pipeline.optimize_for_production()
# Generate speech
sr, audio = safe_tts.pipeline.synthesize(
text=text,
speaker="BDL",
enable_chunking=True,
apply_audio_processing=True
)
return sr, audio
except Exception as e:
logger.warning(f"TTS failed, using fallback: {e}")
return generate_fallback_audio(text)
def generate_silence():
"""Generate short silence."""
return 16000, np.zeros(8000, dtype=np.int16)
def generate_fallback_audio(text):
"""Generate simple audio as fallback."""
# Create a simple beep based on text length
duration = min(len(text) * 0.08, 3.0)
sr = 16000
samples = int(duration * sr)
if samples == 0:
return generate_silence()
# Generate simple tone
t = np.linspace(0, duration, samples)
frequency = 440 # A4
audio = np.sin(2 * np.pi * frequency * t) * 0.2
# Add some variation for different text
if len(text) > 10:
audio += np.sin(2 * np.pi * 880 * t) * 0.1
return sr, (audio * 32767).astype(np.int16)
# Create the interface using the most basic approach
def create_interface():
"""Create interface with maximum compatibility."""
# Use the simplest possible interface
interface = gr.Interface(
fn=safe_tts,
inputs="text", # Simplest input type
outputs="audio", # Simplest output type
title="Armenian Text-to-Speech",
description="Enter Armenian text to generate speech.",
examples=[
"Բարև ձեզ",
"Ինչպե՞ս եք",
"Շնորհակալություն"
]
)
return interface
# Main execution
if __name__ == "__main__":
try:
# Create and launch interface
app = create_interface()
app.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
quiet=True # Reduce noise
)
except Exception as e:
print(f"Failed to launch: {e}")
# Emergency fallback - create the simplest possible app
emergency_app = gr.Interface(
fn=lambda x: generate_fallback_audio(x or "test"),
inputs="text",
outputs="audio",
title="Armenian TTS (Emergency Mode)"
)
emergency_app.launch(server_name="0.0.0.0", server_port=7860)