import wave import numpy as np from io import BytesIO from flask import Flask, request, send_file from flask_cors import CORS from huggingface_hub import hf_hub_download from piper import PiperVoice app = Flask(__name__) CORS(app) def synthesize_speech(text, sentence_silence, length_scale): model_path = hf_hub_download(repo_id="csukuangfj/vits-piper-en_US-lessac-medium", filename="en_US-lessac-medium.onnx") config_path = hf_hub_download(repo_id="csukuangfj/vits-piper-en_US-lessac-medium", filename="en_US-lessac-medium.onnx.json") voice = PiperVoice.load(model_path, config_path) buffer = BytesIO() with wave.open(buffer, 'wb') as wav_file: wav_file.setframerate(voice.config.sample_rate) wav_file.setsampwidth(2) wav_file.setnchannels(1) voice.synthesize(text, wav_file, sentence_silence=sentence_silence, length_scale=length_scale) buffer.seek(0) return buffer @app.route('/') def index(): return "Server is running" @app.route('/tts', methods=['POST']) def tts(): data = request.json text = data.get('text', '') sentence_silence = float(data.get('sentence_silence', 0.1)) length_scale = float(data.get('length_scale', 1.0)) audio_buffer = synthesize_speech(text, sentence_silence, length_scale) return send_file(audio_buffer, mimetype="audio/wav", as_attachment=True, download_name="output.wav") if __name__ == '__main__': app.run(host='0.0.0.0', port=5000)