Spaces:

ChandimaPrabath
/

tts

Paused

App Files Files Community

tts / app.py

ChandimaPrabath

init

fad2e64 9 months ago

raw

history blame

3.08 kB

	import logging
	import wave
	import numpy as np
	from io import BytesIO
	from flask import Flask, request, send_file, jsonify
	from flask_cors import CORS
	from huggingface_hub import hf_hub_download
	from piper import PiperVoice

	app = Flask(__name__)
	CORS(app)

	# Setup logging
	logging.basicConfig(level=logging.DEBUG)
	logger = logging.getLogger(__name__)

	def synthesize_speech(text, sentence_silence, length_scale):
	logger.debug("Downloading model and config files...")
	model_path = hf_hub_download(repo_id="csukuangfj/vits-piper-en_US-lessac-medium", filename="en_US-lessac-medium.onnx")
	config_path = hf_hub_download(repo_id="csukuangfj/vits-piper-en_US-lessac-medium", filename="en_US-lessac-medium.onnx.json")

	logger.debug("Loading PiperVoice model...")
	voice = PiperVoice.load(model_path, config_path)

	buffer = BytesIO()
	logger.debug("Synthesizing speech...")
	with wave.open(buffer, 'wb') as wav_file:
	wav_file.setframerate(voice.config.sample_rate)
	wav_file.setsampwidth(2)
	wav_file.setnchannels(1)
	voice.synthesize(text, wav_file, sentence_silence=sentence_silence, length_scale=length_scale)

	buffer.seek(0)
	logger.debug("Speech synthesis complete.")
	return buffer

	@app.route('/')
	def index():
	return '''
	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>TTS Server</title>
	<style>
	body { font-family: Arial, sans-serif; margin: 40px; }
	h1 { color: #333; }
	p { font-size: 1.2em; }
	code { background: #f4f4f4; padding: 2px 4px; border-radius: 4px; }
	</style>
	</head>
	<body>
	<h1>TTS Server is Running</h1>
	<p>Use the <code>/tts</code> endpoint to synthesize speech.</p>
	<p>Send a POST request with JSON data containing the <code>text</code>, <code>sentence_silence</code>, and <code>length_scale</code> parameters.</p>
	</body>
	</html>
	'''

	@app.route('/tts', methods=['POST'])
	def tts():
	data = request.json
	if not data:
	logger.error("No data received in request.")
	return jsonify({"error": "No data provided"}), 400

	text = data.get('text', '')
	sentence_silence = float(data.get('sentence_silence', 0.1))
	length_scale = float(data.get('length_scale', 1.0))

	if not text:
	logger.error("No text provided in request.")
	return jsonify({"error": "Text parameter is required"}), 400

	logger.info(f"Received request: text={text}, sentence_silence={sentence_silence}, length_scale={length_scale}")
	try:
	audio_buffer = synthesize_speech(text, sentence_silence, length_scale)
	except Exception as e:
	logger.exception("Error during speech synthesis.")
	return jsonify({"error": str(e)}), 500

	return send_file(audio_buffer, mimetype="audio/wav", as_attachment=True, download_name="output.wav")

	if __name__ == '__main__':
	app.run(debug=True, host='0.0.0.0', port=7860)