Spaces:
Paused
Paused
File size: 4,032 Bytes
fad2e64 c6169e1 fad2e64 c6169e1 fad2e64 d690791 fad2e64 d690791 c6169e1 fad2e64 c6169e1 fad2e64 c6169e1 fad2e64 c6169e1 fad2e64 d690791 fad2e64 c6169e1 d690791 c6169e1 fad2e64 d690791 c6169e1 d690791 fad2e64 d690791 fad2e64 d690791 fad2e64 c6169e1 fad2e64 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import logging
import wave
import numpy as np
from io import BytesIO
from flask import Flask, request, send_file, jsonify
from flask_cors import CORS
from huggingface_hub import hf_hub_download
from piper import PiperVoice
app = Flask(__name__)
CORS(app)
# Setup logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
# Available models list
available_models = [
{"repo_id": "csukuangfj/vits-piper-en_US-lessac-medium", "filename": "en_US-lessac-medium.onnx"},
{"repo_id": "csukuangfj/vits-piper-en_US-hfc_female-medium", "filename": "en_US-hfc_female-medium.onnx"},
{"repo_id": "csukuangfj/vits-piper-en_GB-southern_english_female-medium", "filename": "en_GB-southern_english_female-medium.onnx"}
]
def synthesize_speech(repo_id, model_filename, text, sentence_silence, length_scale):
logger.debug("Downloading model and config files...")
model_path = hf_hub_download(repo_id=repo_id, filename=model_filename)
config_path = hf_hub_download(repo_id=repo_id, filename=f"{model_filename}.json")
logger.debug("Loading PiperVoice model...")
voice = PiperVoice.load(model_path, config_path)
buffer = BytesIO()
logger.debug("Synthesizing speech...")
with wave.open(buffer, 'wb') as wav_file:
wav_file.setframerate(voice.config.sample_rate)
wav_file.setsampwidth(2)
wav_file.setnchannels(1)
voice.synthesize(text, wav_file, sentence_silence=sentence_silence, length_scale=length_scale)
buffer.seek(0)
logger.debug("Speech synthesis complete.")
return buffer
@app.route('/')
def index():
return '''
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>TTS Server</title>
<style>
body { font-family: Arial, sans-serif; margin: 40px; }
h1 { color: #333; }
p { font-size: 1.2em; }
code { background: #f4f4f4; padding: 2px 4px; border-radius: 4px; }
</style>
</head>
<body>
<h1>TTS Server is Running</h1>
<p>Use the <code>/tts</code> endpoint to synthesize speech.</p>
<p>Send a POST request with JSON data containing the <code>model</code>, <code>text</code>, <code>sentence_silence</code>, and <code>length_scale</code> parameters.</p>
</body>
</html>
'''
@app.route('/models')
def models():
return jsonify(available_models)
@app.route('/tts', methods=['POST'])
def tts():
data = request.json
if not data:
logger.error("No data received in request.")
return jsonify({"error": "No data provided"}), 400
model = data.get('model', '')
text = data.get('text', '')
sentence_silence = float(data.get('sentence_silence', 0.1))
length_scale = float(data.get('length_scale', 1.0))
if not model:
logger.error("No model provided in request.")
return jsonify({"error": "Model parameter is required"}), 400
if not text:
logger.error("No text provided in request.")
return jsonify({"error": "Text parameter is required"}), 400
# Find the model in the available models list
selected_model = next((m for m in available_models if m["filename"] == model), None)
if not selected_model:
logger.error(f"Model {model} not found.")
return jsonify({"error": f"Model {model} not found"}), 404
logger.info(f"Received request: model={model}, text={text}, sentence_silence={sentence_silence}, length_scale={length_scale}")
try:
audio_buffer = synthesize_speech(selected_model['repo_id'], selected_model['filename'], text, sentence_silence, length_scale)
except Exception as e:
logger.exception("Error during speech synthesis.")
return jsonify({"error": str(e)}), 500
return send_file(audio_buffer, mimetype="audio/wav", as_attachment=True, download_name="output.wav")
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=7860)
|