Spaces:
Sleeping
Sleeping
File size: 2,695 Bytes
2161081 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
// azure-cognitiveservices-speech.js
require('dotenv').config()
const sdk = require('microsoft-cognitiveservices-speech-sdk');
const blendShapeNames = require('./blendshapeNames');
const _ = require('lodash');
let SSML = `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xml:lang="en-US">
<voice name="en-US-JennyNeural">
<mstts:viseme type="FacialExpression"/>
__TEXT__
</voice>
</speak>`;
const key = process.env.AZURE_KEY;
const region = process.env.AZURE_REGION;
/**
* Node.js server code to convert text to speech
* @returns stream
* @param {*} key your resource key
* @param {*} region your resource region
* @param {*} text text to convert to audio/speech
* @param {*} filename optional - best for long text - temp file for converted speech/audio
*/
const textToSpeech = async (text, voice)=> {
// convert callback function to promise
return new Promise((resolve, reject) => {
let ssml = SSML.replace("__TEXT__", text);
const speechConfig = sdk.SpeechConfig.fromSubscription(key, region);
speechConfig.speechSynthesisOutputFormat = 5; // mp3
let audioConfig = null;
// if (filename) {
let randomString = Math.random().toString(36).slice(2, 7);
let filename = `./public/speech-${randomString}.mp3`;
audioConfig = sdk.AudioConfig.fromAudioFileOutput(filename);
// }
let blendData = [];
let timeStep = 1/60;
let timeStamp = 0;
const synthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig);
// Subscribes to viseme received event
synthesizer.visemeReceived = function (s, e) {
// `Animation` is an xml string for SVG or a json string for blend shapes
var animation = JSON.parse(e.animation);
_.each(animation.BlendShapes, blendArray => {
let blend = {};
_.each(blendShapeNames, (shapeName, i) => {
blend[shapeName] = blendArray[i];
});
blendData.push({
time: timeStamp,
blendshapes: blend
});
timeStamp += timeStep;
});
}
synthesizer.speakSsmlAsync(
ssml,
result => {
synthesizer.close();
resolve({blendData, filename: `/speech-${randomString}.mp3`});
},
error => {
synthesizer.close();
reject(error);
});
});
};
module.exports = textToSpeech; |