Spaces:

yuvabe-ai
/

viseme3d_backend

Runtime error

App Files Files Community

viseme3d_backend / helpers /tts.js

Thamaraikannan's picture

Update helpers/tts.js

e737b54 verified 5 months ago

2.93 kB

	// azure-cognitiveservices-speech.js
	require('dotenv').config()
	const sdk = require('microsoft-cognitiveservices-speech-sdk');
	const blendShapeNames = require('./blendshapeNames');
	const _ = require('lodash');

	let SSML = `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xml:lang="en-US">
	<voice name="en-US-JennyNeural">
	<mstts:viseme type="FacialExpression"/>
	__TEXT__
	</voice>
	</speak>`;


	const key = process.env['AZURE_KEY'];
	const region = process.env['AZURE_REGION'];

	// Check if variables are loaded
	if (!key \|\| !region) {
	console.error("❌ Azure Speech API key or region is missing!");
	process.exit(1);
	} else {
	console.log("✅ Environment variables loaded successfully");
	}



	/**
	* Node.js server code to convert text to speech
	* @returns stream
	* @param {*} key your resource key
	* @param {*} region your resource region
	* @param {*} text text to convert to audio/speech
	* @param {*} filename optional - best for long text - temp file for converted speech/audio
	*/
	const textToSpeech = async (text, voice)=> {

	// convert callback function to promise
	return new Promise((resolve, reject) => {


	let ssml = SSML.replace("__TEXT__", text);


	const speechConfig = sdk.SpeechConfig.fromSubscription(key, region);
	speechConfig.speechSynthesisOutputFormat = 5; // mp3

	let audioConfig = null;

	// if (filename) {
	let randomString = Math.random().toString(36).slice(2, 7);
	let filename = `./public/speech-${randomString}.mp3`;
	audioConfig = sdk.AudioConfig.fromAudioFileOutput(filename);
	// }

	let blendData = [];
	let timeStep = 1/60;
	let timeStamp = 0;

	const synthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig);

	// Subscribes to viseme received event
	synthesizer.visemeReceived = function (s, e) {

	// `Animation` is an xml string for SVG or a json string for blend shapes
	var animation = JSON.parse(e.animation);

	_.each(animation.BlendShapes, blendArray => {

	let blend = {};
	_.each(blendShapeNames, (shapeName, i) => {
	blend[shapeName] = blendArray[i];
	});

	blendData.push({
	time: timeStamp,
	blendshapes: blend
	});
	timeStamp += timeStep;
	});

	}


	synthesizer.speakSsmlAsync(
	ssml,
	result => {

	synthesizer.close();
	resolve({blendData, filename: `/speech-${randomString}.mp3`});

	},
	error => {
	synthesizer.close();
	reject(error);
	});
	});
	};

	module.exports = textToSpeech;