Spaces:
Paused
Paused
import logging | |
import wave | |
import numpy as np | |
from io import BytesIO | |
from flask import Flask, request, send_file, jsonify | |
from flask_cors import CORS | |
from huggingface_hub import hf_hub_download | |
from piper import PiperVoice | |
app = Flask(__name__) | |
CORS(app) | |
# Setup logging | |
logging.basicConfig(level=logging.DEBUG) | |
logger = logging.getLogger(__name__) | |
# Available models list | |
available_models = [ | |
{"repo_id": "csukuangfj/vits-piper-en_US-lessac-medium", "filename": "en_US-lessac-medium.onnx"}, | |
{"repo_id": "csukuangfj/vits-piper-en_US-hfc_female-medium", "filename": "en_US-hfc_female-medium.onnx"}, | |
{"repo_id": "csukuangfj/vits-piper-en_GB-southern_english_female-medium", "filename": "en_GB-southern_english_female-medium.onnx"} | |
] | |
def synthesize_speech(repo_id, model_filename, text, sentence_silence, length_scale): | |
logger.debug("Downloading model and config files...") | |
model_path = hf_hub_download(repo_id=repo_id, filename=model_filename) | |
config_path = hf_hub_download(repo_id=repo_id, filename=f"{model_filename}.json") | |
logger.debug("Loading PiperVoice model...") | |
voice = PiperVoice.load(model_path, config_path) | |
buffer = BytesIO() | |
logger.debug("Synthesizing speech...") | |
with wave.open(buffer, 'wb') as wav_file: | |
wav_file.setframerate(voice.config.sample_rate) | |
wav_file.setsampwidth(2) | |
wav_file.setnchannels(1) | |
voice.synthesize(text, wav_file, sentence_silence=sentence_silence, length_scale=length_scale) | |
buffer.seek(0) | |
logger.debug("Speech synthesis complete.") | |
return buffer | |
def index(): | |
return ''' | |
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<title>TTS Server</title> | |
<style> | |
body { font-family: Arial, sans-serif; margin: 40px; } | |
h1 { color: #333; } | |
p { font-size: 1.2em; } | |
code { background: #f4f4f4; padding: 2px 4px; border-radius: 4px; } | |
</style> | |
</head> | |
<body> | |
<h1>TTS Server is Running</h1> | |
<p>Use the <code>/tts</code> endpoint to synthesize speech.</p> | |
<p>Send a POST request with JSON data containing the <code>model</code>, <code>text</code>, <code>sentence_silence</code>, and <code>length_scale</code> parameters.</p> | |
</body> | |
</html> | |
''' | |
def models(): | |
return jsonify(available_models) | |
def tts(): | |
data = request.json | |
if not data: | |
logger.error("No data received in request.") | |
return jsonify({"error": "No data provided"}), 400 | |
model = data.get('model', '') | |
text = data.get('text', '') | |
sentence_silence = float(data.get('sentence_silence', 0.1)) | |
length_scale = float(data.get('length_scale', 1.0)) | |
if not model: | |
logger.error("No model provided in request.") | |
return jsonify({"error": "Model parameter is required"}), 400 | |
if not text: | |
logger.error("No text provided in request.") | |
return jsonify({"error": "Text parameter is required"}), 400 | |
# Find the model in the available models list | |
selected_model = next((m for m in available_models if m["filename"] == model), None) | |
if not selected_model: | |
logger.error(f"Model {model} not found.") | |
return jsonify({"error": f"Model {model} not found"}), 404 | |
logger.info(f"Received request: model={model}, text={text}, sentence_silence={sentence_silence}, length_scale={length_scale}") | |
try: | |
audio_buffer = synthesize_speech(selected_model['repo_id'], selected_model['filename'], text, sentence_silence, length_scale) | |
except Exception as e: | |
logger.exception("Error during speech synthesis.") | |
return jsonify({"error": str(e)}), 500 | |
return send_file(audio_buffer, mimetype="audio/wav", as_attachment=True, download_name="output.wav") | |
if __name__ == '__main__': | |
app.run(debug=True, host='0.0.0.0', port=7860) | |