Spaces:

Akjava
/

matcha-tts_vctk-onnx

Running

App Files Files Community

matcha-tts_vctk-onnx / js-esm /arpa_to_ipa.js

Akjava's picture

Upload 26 files

8bc8c75 verified about 2 months ago

history blame contribute delete

9.54 kB

	//ver 0.1

	const vowels = {
	//apply rule owel length is indicated (AA -> ɑː, ER -> ɝː, IY -> iː, UW -> uː). However, unstressed word-final ER and IY are short (i.e., ER0 -> ɝ and IY -> i when word-final).
	'AA0': 'ɑː',
	'AA1': 'ɑː',
	'AA2': 'ɑː',
	'AE0': 'æ',
	'AE1': 'æ',
	'AE2': 'æ',
	'AH0': 'ə',
	'AH1': 'ʌ',
	'AH2': 'ə',//AH is converted to ʌ when bearing primary stress and to ə otherwise (AH1 -> ʌ; AH0, AH2 -> ə) from https://github.com/menelik3/cmudict-ipa
	'AO0': 'ɔ',
	'AO1': 'ɔ',
	'AO2': 'ɔ',
	'AW0': 'aʊ',
	'AW1': 'aʊ',
	'AW2': 'aʊ',
	'AY0': 'aɪ',
	'AY1': 'aɪ',
	'AY2': 'aɪ',
	'EH0': 'ɛ',
	'EH1': 'ɛ',
	'EH2': 'ɛ',
	'ER0': 'ɝ',//somehow this way betters
	'ER1': 'ɝː',
	'ER2': 'ɝː',
	'EY0': 'eɪ',
	'EY1': 'eɪ',
	'EY2': 'eɪ',
	'IH0': 'ɪ',
	'IH1': 'ɪ',
	'IH2': 'ɪ',
	'IY0': 'iː',
	'IY1': 'iː',
	'IY2': 'iː',
	'OW0': 'oʊ',
	'OW1': 'oʊ',
	'OW2': 'oʊ',
	'OY0': 'ɔɪ',
	'OY1': 'ɔɪ',
	'OY2': 'ɔɪ',
	'UH0': 'ʊ',
	'UH1': 'ʊ',
	'UH2': 'ʊ',
	'UW0': 'uː',
	'UW1': 'uː',
	'UW2': 'uː'
	};

	const consonants = {
	'B': 'b',
	'CH': 'tʃ',
	'D': 'd',
	'DH': 'ð',
	'F': 'f',
	'G': 'g',
	'HH': 'h',
	'JH': 'dʒ',
	'K': 'k',
	'L': 'l',
	'M': 'm',
	'N': 'n',
	'NG': 'ŋ',
	'P': 'p',
	'R': 'r',
	'S': 's',
	'SH': 'ʃ',
	'T': 't',
	'TH': 'θ',
	'V': 'v',
	'W': 'w',
	'Y': 'j',
	'Z': 'z',
	'ZH': 'ʒ'
	};


	const AccentMode ={
	SIMPLIFIED_VOWEL_ALIGNED:"SIMPLIFIED_VOWEL_ALIGNED",//Stable not for Human
	STANDARD:"STANDARd",//for Human but broken ,it's hard to split syallable constraints correctly
	NONE:"NONE"
	};

	const arpa_to_ipa_lookup_tables = {
	...vowels,
	...consonants
	};

	class Syllable {
	constructor(ontop,nucleus, coder,accent,ontop_arpa) {
	this.ontop = ontop;
	this.ontop_arpa = ontop_arpa
	this.nucleus = nucleus; // vowel

	this.coder = coder
	this.accent = accent;
	}


	display() {
	console.log(`Ontop: ${this.ontop} Nucleus: ${this.nucleus}, Coder: ${this.coder}, Accent: ${this.accent}`);
	}
	}

	//for AccentMode.STANDARD but not good
	const consonantClusters = [
	"PL", "PR", "TR", "BR", "KR", "GR", "DR", "GL", "FL", "BL", "KL",
	// Stop + Nasal
	"TN", "DN", "PN", "GN", "BM", "DM", "PM", "GM", "TM",
	// Fricative + Approximant/Lateral
	"SL", "SW", "SHL", "SHR", "VL", "VR", "ZL", "ZR", "THL", "THR",
	"FTH", "VTH", "ZTH",
	// Other important combinations
	"FY", "KY", "MY", "NY", "HY", "BY", "PY", "LY",
	//add
	"KW","DW",
	// 3-phoneme Clusters
	"SPR", "STR", "SKR", "SPL", "STL", "SKL", "SHT", "SPT", "STK", "SPN"
	];

	// for AccentMode.STANDARD but not good
	function splitCodaOnset(consonants,pre_nucleus=null,post_nucleus=null) {
	if (consonants.length==0){
	return [[],[]];
	}else if (consonants.length==1){
	return [[],consonants];
	}
	let peakIndex = 1
	const cluster=consonants.join("")
	if ((cluster == "DM" \|\| cluster == "DN") && (pre_nucleus == "ə" \|\| pre_nucleus=="æ")){ //AD
	peakIndex = 1
	}else if (consonantClusters.includes(cluster)){
	return [[],consonants];
	}


	if (cluster == "RDV"){
	peakIndex = 2
	}
	else{
	if (consonants.length>3){
	const last_cluster=consonants.slice(1).join("")
	//console.log(head_cluster)
	if (consonantClusters.includes(last_cluster)){
	peakIndex = 1
	}else{
	peakIndex = 2
	}
	}
	}

	const coda = consonants.slice(0, peakIndex);
	const onset = consonants.slice(peakIndex);

	return [ coda, onset ];
	}

	// Function to convert Arpabet to IPA
	function arpa_to_ipa(arpa_text,accent_mode=AccentMode.SIMPLIFIED_VOWEL_ALIGNED) {
	arpa_text = arpa_text.replaceAll(",","\t,").replaceAll(".","\t.").replaceAll("?","\t?").replaceAll("!","\t!")
	console.log(arpa_text)
	const words = arpa_text.split("\t")
	const ipa_texts = []
	words.forEach(function(word){
	word = word.trim()
	//console.log(`'${word}'`)
	if (word == ""){
	return
	}
	else if (word == "." \|\| word ==","\|\| word =="!"\|\| word =="?"){
	ipa_texts.push(word)
	}else{

	let syllable = arpa_to_ipa_with_syllables(word)
	const ipa_text = syallablesToString(syllable,accent_mode)

	ipa_texts.push(ipa_text)
	ipa_texts.push(" ") //word separator
	}

	});

	return ipa_texts.join("").replaceAll(" .",".").replaceAll(" ,",",").replaceAll(" ?","?").replaceAll(" !","!")


	}

	function arpas_symbol_to_ipa(phonemes){
	let ipaText = ""
	for (let i = 0; i < phonemes.length; i++) {
	const phoneme = phonemes[i];
	let ipaSymbol = arpa_to_ipa_lookup_tables[phoneme];
	if (ipaSymbol === undefined) {
	console.log(`Invalid Arpabet phoneme: ${phoneme}`);
	continue; // Skip invalid phonemes
	}
	ipaText+=ipaSymbol
	}
	return ipaText
	}

	// Function to convert Arpabet to IPA and extract syllable information
	function arpa_to_ipa_with_syllables(arpa) {
	arpa = arpa.toUpperCase();
	const phonemes = arpa.split(' ');
	let syllables = [];
	let currentSyllable = { nucleus: null, ontop: "", coder:"", accent: -1 ,ontop_arpa:[]}; // Default accent is -1

	for (let i = 0; i < phonemes.length; i++) {
	const phoneme = phonemes[i];
	let ipaSymbol = arpa_to_ipa_lookup_tables[phoneme];
	if (ipaSymbol === undefined) {//for omitted vowel
	ipaSymbol = arpa_to_ipa_lookup_tables[phoneme+"0"];
	}

	if (ipaSymbol === undefined) {
	console.log(`Invalid Arpabet phoneme: ${phoneme}`);
	continue; // Skip invalid phonemes
	}

	// Check for vowel (Corrected condition)
	if (phoneme in vowels) {

	let accent = -1; // Default accent is -1
	const lastChar = phoneme.slice(-1);
	if (!isNaN(lastChar)) { // Check if the last character is a number
	accent = parseInt(lastChar, 10);
	}

	syllables.push(new Syllable(currentSyllable.ontop,ipaSymbol, currentSyllable.coder, accent,currentSyllable.ontop_arpa));
	//}

	currentSyllable = { nucleus: null, ontop: "", coder:"",accent: -1 ,ontop_arpa:[]};
	} else {
	currentSyllable.ontop += ipaSymbol;
	currentSyllable.ontop_arpa.push(phoneme)
	}
	}


	// Add the last syllable if it has content
	if (currentSyllable.nucleus !== null \|\| currentSyllable.ontop !== "") {
	syllables.push(new Syllable(currentSyllable.ontop,currentSyllable.nucleus, currentSyllable.coder, currentSyllable.accent));
	}

	// merge last syallable
	let last_syallable = syllables[syllables.length-1]
	// move single last ontop to pre-coder
	if (last_syallable.nucleus == null){
	const pre_syallable = syllables[syllables.length-2]
	pre_syallable.coder += last_syallable.ontop
	syllables = syllables.slice(0,syllables.length-1)
	}

	for(let i=1;i<syllables.length;i++){
	const result = splitCodaOnset(syllables[i].ontop_arpa, syllables[i-1].nucleus, syllables[i].nucleus)
	const coder = arpas_symbol_to_ipa(result[0])
	const onset = arpas_symbol_to_ipa(result[1])
	syllables[i-1].coder = coder
	syllables[i].ontop = onset
	}




	last_syallable = syllables[syllables.length-1]
	if (last_syallable.nucleus!=null){
	if (last_syallable.accent<1){
	if(last_syallable.nucleus.endsWith("iː") && last_syallable.coder==""){
	last_syallable.nucleus = last_syallable.nucleus.substring(0, last_syallable.nucleus.length-1)
	}
	else if(last_syallable.nucleus.endsWith("ɝː")){
	last_syallable.nucleus = last_syallable.nucleus.substring(0, last_syallable.nucleus.length-1)
	}
	}
	}

	return syllables;
	}

	function syallablesToString(syllables,accent_mode=AccentMode.SIMPLIFIED_VOWEL_ALIGNED) {
	let ipaString = "";


	for (let i = 0; i < syllables.length; i++) {
	const syllable = syllables[i];
	//console.log(syllable.consonant)
	const nucleus = (syllable.nucleus != null) ? syllable.nucleus : "";
	let accent = ""

	//console.log(ipaString)

	if (syllable.accent === 1) {
	accent = "ˈ";
	} else if (syllable.accent === 2) {
	accent = "ˌ";
	} else if (syllable.accent === 0) {
	//ipaString = "ˌ" + ipaString;
	}
	if (accent_mode == AccentMode.STANDARD){
	ipaString += accent+syllable.ontop + nucleus+syllable.coder;
	}else if (accent_mode == AccentMode.SIMPLIFIED_VOWEL_ALIGNED){
	ipaString += syllable.ontop + accent+nucleus+syllable.coder;
	}else{
	ipaString += syllable.ontop + nucleus+syllable.coder;
	}

	}

	return ipaString;
	}

	export { arpa_to_ipa };