Spaces:

ChandimaPrabath
/

tts

Paused

tts

File size: 3,078 Bytes

fad2e64
c6169e1
 
 
fad2e64
c6169e1
 
 
 
 
 
 
fad2e64
 
 
 
c6169e1
fad2e64
c6169e1
 
 
fad2e64
c6169e1
 
 
fad2e64
c6169e1
 
 
 
 
 
 
fad2e64
c6169e1
 
 
 
fad2e64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c6169e1
 
 
 
fad2e64
 
 
 
c6169e1
 
 
 
fad2e64
 
 
 
 
 
 
 
 
 
c6169e1
 
 
 
fad2e64

import logging
import wave
import numpy as np
from io import BytesIO
from flask import Flask, request, send_file, jsonify
from flask_cors import CORS
from huggingface_hub import hf_hub_download
from piper import PiperVoice

app = Flask(__name__)
CORS(app)

# Setup logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

def synthesize_speech(text, sentence_silence, length_scale):
    logger.debug("Downloading model and config files...")
    model_path = hf_hub_download(repo_id="csukuangfj/vits-piper-en_US-lessac-medium", filename="en_US-lessac-medium.onnx")
    config_path = hf_hub_download(repo_id="csukuangfj/vits-piper-en_US-lessac-medium", filename="en_US-lessac-medium.onnx.json")

    logger.debug("Loading PiperVoice model...")
    voice = PiperVoice.load(model_path, config_path)

    buffer = BytesIO()
    logger.debug("Synthesizing speech...")
    with wave.open(buffer, 'wb') as wav_file:
        wav_file.setframerate(voice.config.sample_rate)
        wav_file.setsampwidth(2)
        wav_file.setnchannels(1)
        voice.synthesize(text, wav_file, sentence_silence=sentence_silence, length_scale=length_scale)

    buffer.seek(0)
    logger.debug("Speech synthesis complete.")
    return buffer

@app.route('/')
def index():
    return '''
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>TTS Server</title>
        <style>
            body { font-family: Arial, sans-serif; margin: 40px; }
            h1 { color: #333; }
            p { font-size: 1.2em; }
            code { background: #f4f4f4; padding: 2px 4px; border-radius: 4px; }
        </style>
    </head>
    <body>
        <h1>TTS Server is Running</h1>
        <p>Use the <code>/tts</code> endpoint to synthesize speech.</p>
        <p>Send a POST request with JSON data containing the <code>text</code>, <code>sentence_silence</code>, and <code>length_scale</code> parameters.</p>
    </body>
    </html>
    '''

@app.route('/tts', methods=['POST'])
def tts():
    data = request.json
    if not data:
        logger.error("No data received in request.")
        return jsonify({"error": "No data provided"}), 400

    text = data.get('text', '')
    sentence_silence = float(data.get('sentence_silence', 0.1))
    length_scale = float(data.get('length_scale', 1.0))

    if not text:
        logger.error("No text provided in request.")
        return jsonify({"error": "Text parameter is required"}), 400

    logger.info(f"Received request: text={text}, sentence_silence={sentence_silence}, length_scale={length_scale}")
    try:
        audio_buffer = synthesize_speech(text, sentence_silence, length_scale)
    except Exception as e:
        logger.exception("Error during speech synthesis.")
        return jsonify({"error": str(e)}), 500

    return send_file(audio_buffer, mimetype="audio/wav", as_attachment=True, download_name="output.wav")

if __name__ == '__main__':
    app.run(debug=True, host='0.0.0.0', port=7860)