import logging import wave import numpy as np from io import BytesIO from flask import Flask, request, send_file, jsonify from flask_cors import CORS from huggingface_hub import hf_hub_download from piper import PiperVoice app = Flask(__name__) CORS(app) # Setup logging logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) # Available models list available_models = [ {"repo_id": "csukuangfj/vits-piper-en_US-lessac-medium", "filename": "en_US-lessac-medium.onnx"}, {"repo_id": "csukuangfj/vits-piper-en_US-hfc_female-medium", "filename": "en_US-hfc_female-medium.onnx"}, {"repo_id": "csukuangfj/vits-piper-en_GB-southern_english_female-medium", "filename": "en_GB-southern_english_female-medium.onnx"} ] def synthesize_speech(repo_id, model_filename, text, sentence_silence, length_scale): logger.debug("Downloading model and config files...") model_path = hf_hub_download(repo_id=repo_id, filename=model_filename) config_path = hf_hub_download(repo_id=repo_id, filename=f"{model_filename}.json") logger.debug("Loading PiperVoice model...") voice = PiperVoice.load(model_path, config_path) buffer = BytesIO() logger.debug("Synthesizing speech...") with wave.open(buffer, 'wb') as wav_file: wav_file.setframerate(voice.config.sample_rate) wav_file.setsampwidth(2) wav_file.setnchannels(1) voice.synthesize(text, wav_file, sentence_silence=sentence_silence, length_scale=length_scale) buffer.seek(0) logger.debug("Speech synthesis complete.") return buffer @app.route('/') def index(): return '''
Use the /tts
endpoint to synthesize speech.
Send a POST request with JSON data containing the model
, text
, sentence_silence
, and length_scale
parameters.