|
const _pad = "_";
|
|
const _punctuation = ";:,.!?¡¿—…\"«»“” ";
|
|
const _letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
|
|
const _letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ";
|
|
|
|
|
|
const Symbols = [_pad, ..._punctuation, ..._letters, ..._letters_ipa];
|
|
|
|
const SpaceId = Symbols.indexOf(' ');
|
|
|
|
const symbolToId = {};
|
|
const idToSymbol = {};
|
|
|
|
|
|
for (let i = 0; i < Symbols.length; i++) {
|
|
symbolToId[Symbols[i]] = i;
|
|
idToSymbol[i] = Symbols[i];
|
|
}
|
|
|
|
class MatchaTTSRaw {
|
|
constructor() {
|
|
this.processing = false
|
|
}
|
|
async load_model(model_path,options={}){
|
|
this.session = await ort.InferenceSession.create(model_path,options);
|
|
console.log(this.session)
|
|
const inputNames = this.session.inputNames;
|
|
this.need_spks = inputNames.includes("spks")
|
|
console.log(`this model need spks = ${this.need_spks}`);
|
|
return this.session
|
|
}
|
|
|
|
get_output_names_html(){
|
|
if (typeof this.session=='undefined'){
|
|
return null
|
|
}
|
|
let outputNamesString = '[outputNames]<br>';
|
|
const outputNames = this.session.outputNames;
|
|
for (let outputName of outputNames) {
|
|
console.log(outputName)
|
|
outputNamesString+=outputName+"<br>"
|
|
}
|
|
return outputNamesString.trim()
|
|
}
|
|
|
|
get_input_names_html(){
|
|
if (typeof this.session=='undefined'){
|
|
return null
|
|
}
|
|
|
|
let inputNamesString = '[inputNames]<br>';
|
|
const inputNames = this.session.inputNames;
|
|
|
|
for (let inputName of inputNames) {
|
|
console.log(inputName)
|
|
inputNamesString+=inputName+"<br>"
|
|
}
|
|
return inputNamesString.trim()
|
|
}
|
|
|
|
|
|
processText(text) {
|
|
const x = this.intersperse(this.textToSequence(text));
|
|
const x_phones = this.sequenceToText(x);
|
|
const textList = [];
|
|
for (let i = 1; i < x_phones.length; i += 2) {
|
|
textList.push(x_phones[i]);
|
|
}
|
|
|
|
return {
|
|
x: x,
|
|
x_length: x.length,
|
|
x_phones: x_phones,
|
|
x_phones_label: textList.join(""),
|
|
};
|
|
}
|
|
|
|
|
|
basicCleaners2(text, lowercase = false) {
|
|
if (lowercase) {
|
|
text = text.toLowerCase();
|
|
}
|
|
text = text.replace(/\s+/g, " ");
|
|
return text;
|
|
}
|
|
|
|
textToSequence(text) {
|
|
const sequenceList = [];
|
|
const clean_text = this.basicCleaners2(text);
|
|
for (let i = 0; i < clean_text.length; i++) {
|
|
const symbol = clean_text[i];
|
|
sequenceList.push(symbolToId[symbol]);
|
|
}
|
|
return sequenceList;
|
|
}
|
|
|
|
intersperse(sequence, item = 0) {
|
|
const sequenceList = [item];
|
|
for (let i = 0; i < sequence.length; i++) {
|
|
sequenceList.push(sequence[i]);
|
|
sequenceList.push(item);
|
|
}
|
|
return sequenceList;
|
|
}
|
|
|
|
sequenceToText(sequence) {
|
|
const textList = [];
|
|
for (let i = 0; i < sequence.length; i++) {
|
|
const symbol = idToSymbol[sequence[i]];
|
|
textList.push(symbol);
|
|
}
|
|
return textList.join("");
|
|
}
|
|
|
|
async infer(text, temperature, speed,spks=0) {
|
|
if(this.processing){
|
|
console.error("already processing")
|
|
return null
|
|
}
|
|
|
|
try{
|
|
console.log("set processing True")
|
|
this.processing = true;
|
|
|
|
const dic = this.processText(text);
|
|
console.log(`x:${dic.x.join(", ")}`);
|
|
console.log(`x_length:${dic.x_length}`);
|
|
console.log(`x_phones_label:${dic.x_phones_label}`);
|
|
console.log(`temperature=${temperature} speed = ${speed} spks=${spks}`);
|
|
|
|
|
|
const dataX = new BigInt64Array(dic.x.length)
|
|
for (let i = 0; i < dic.x.length; i++) {
|
|
|
|
dataX[i] = BigInt(dic.x[i]);
|
|
}
|
|
const data_x_length = new BigInt64Array(1)
|
|
data_x_length[0] = BigInt(dic.x_length)
|
|
|
|
|
|
const tensorX = new ort.Tensor('int64', dataX, [1, dic.x.length]);
|
|
|
|
const tensor_x_length = new ort.Tensor('int64', data_x_length, [1]);
|
|
const data_scale = Float32Array.from( [temperature, speed])
|
|
const tensor_scale = new ort.Tensor('float32', data_scale, [2]);
|
|
|
|
|
|
const send_data = {
|
|
x: tensorX,
|
|
x_lengths: tensor_x_length,
|
|
scales: tensor_scale,
|
|
}
|
|
|
|
|
|
if (this.need_spks){
|
|
const data_spks = new BigInt64Array(1)
|
|
data_spks[0] = BigInt(spks)
|
|
const tensor_spks = new ort.Tensor('int64', data_spks, [1]);
|
|
send_data.spks = tensor_spks
|
|
}
|
|
|
|
const output = await this.session.run(send_data);
|
|
|
|
const wav_array = output.wav.data;
|
|
const x_lengths_array = output.wav_lengths.data;
|
|
|
|
return wav_array;
|
|
}catch (exception){
|
|
console.error("Inference error:", exception);
|
|
return null
|
|
}finally{
|
|
console.log("set processing False")
|
|
this.processing = false;
|
|
}
|
|
}
|
|
|
|
|
|
}
|
|
|
|
export { MatchaTTSRaw }; |