// azure-cognitiveservices-speech.js require('dotenv').config() const sdk = require('microsoft-cognitiveservices-speech-sdk'); const blendShapeNames = require('./blendshapeNames'); const _ = require('lodash'); let SSML = ` __TEXT__ `; const key = process.env.AZURE_KEY; const region = process.env.AZURE_REGION; // Check if variables are loaded if (!key || !region) { console.error("❌ Azure Speech API key or region is missing!"); process.exit(1); } else { console.log("✅ Environment variables loaded successfully"); } /** * Node.js server code to convert text to speech * @returns stream * @param {*} key your resource key * @param {*} region your resource region * @param {*} text text to convert to audio/speech * @param {*} filename optional - best for long text - temp file for converted speech/audio */ const textToSpeech = async (text, voice)=> { // convert callback function to promise return new Promise((resolve, reject) => { let ssml = SSML.replace("__TEXT__", text); const speechConfig = sdk.SpeechConfig.fromSubscription(key, region); speechConfig.speechSynthesisOutputFormat = 5; // mp3 let audioConfig = null; // if (filename) { let randomString = Math.random().toString(36).slice(2, 7); let filename = `./public/speech-${randomString}.mp3`; audioConfig = sdk.AudioConfig.fromAudioFileOutput(filename); // } let blendData = []; let timeStep = 1/60; let timeStamp = 0; const synthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig); // Subscribes to viseme received event synthesizer.visemeReceived = function (s, e) { // `Animation` is an xml string for SVG or a json string for blend shapes var animation = JSON.parse(e.animation); _.each(animation.BlendShapes, blendArray => { let blend = {}; _.each(blendShapeNames, (shapeName, i) => { blend[shapeName] = blendArray[i]; }); blendData.push({ time: timeStamp, blendshapes: blend }); console.log(`Timestamp: ${timeStamp.toFixed(3)}s`); console.log(JSON.stringify(blend, null, 2)); timeStamp += timeStep; }); } synthesizer.speakSsmlAsync( ssml, result => { synthesizer.close(); resolve({blendData, filename: `/speech-${randomString}.mp3`}); }, error => { synthesizer.close(); reject(error); }); }); }; module.exports = textToSpeech;