Spaces:
Running
Running
import { getPreviewString, saveTtsProviderSettings } from './index.js'; | |
import { getBase64Async } from '../../utils.js'; | |
import { getRequestHeaders } from '../../../script.js'; | |
export { SpeechT5TtsProvider }; | |
class SpeechT5TtsProvider { | |
//########// | |
// Config // | |
//########// | |
settings; | |
ready = false; | |
voices = []; | |
separator = ' .. '; | |
audioElement = document.createElement('audio'); | |
defaultSettings = { | |
speakers: [], | |
speaker: '', | |
voiceMap: {}, | |
}; | |
get settingsHtml() { | |
let html = ` | |
<label for="speecht5_tts_speaker">Speaker:</label> | |
<div class="flex-container"> | |
<select id="speecht5_tts_speaker" class="text_pole flex1"> | |
</select> | |
<div id="speecht5_tts_speaker_upload_button" class="menu_button" title="Upload speaker"> | |
<i class="fa-solid fa-upload"></i> | |
</div> | |
<div id="speecht5_tts_delete_speaker_button" class="menu_button" title="Delete speaker"> | |
<i class="fa-solid fa-trash"></i> | |
</div> | |
</div> | |
<input type="file" id="speecht5_tts_speaker_upload" class="displayNone"> | |
<div><i>Loading model for the first time may take a while!</i></div> | |
`; | |
return html; | |
} | |
onSettingsChange() { | |
// Used when provider settings are updated from UI | |
this.settings.speaker = $('#speecht5_tts_speaker').val(); | |
saveTtsProviderSettings(); | |
} | |
async previewTtsVoice(voiceId) { | |
this.audioElement.pause(); | |
this.audioElement.currentTime = 0; | |
const text = getPreviewString('en-US'); | |
const response = await this.fetchTtsGeneration(text, voiceId); | |
if (!response.ok) { | |
throw new Error(`HTTP ${response.status}`); | |
} | |
const audio = await response.blob(); | |
const url = URL.createObjectURL(audio); | |
this.audioElement.src = url; | |
this.audioElement.play(); | |
this.audioElement.onended = () => URL.revokeObjectURL(url); | |
} | |
async loadSettings(settings) { | |
// Pupulate Provider UI given input settings | |
if (Object.keys(settings).length == 0) { | |
console.info('Using default TTS Provider settings'); | |
} | |
// Only accept keys defined in defaultSettings | |
this.settings = this.defaultSettings; | |
for (const key in settings) { | |
if (key in this.settings) { | |
this.settings[key] = settings[key]; | |
} else { | |
throw `Invalid setting passed to TTS Provider: ${key}`; | |
} | |
} | |
for (const speaker of this.settings.speakers) { | |
$('#speecht5_tts_speaker').append($('<option>', { | |
value: speaker.voice_id, | |
text: speaker.name, | |
})); | |
} | |
$('#speecht5_tts_speaker').val(this.settings.speaker); | |
$('#speecht5_tts_speaker').on('change', this.onSettingsChange.bind(this)); | |
$('#speecht5_tts_speaker_upload_button').on('click', () => { | |
$('#speecht5_tts_speaker_upload').trigger('click'); | |
}); | |
$('#speecht5_tts_speaker_upload').on('change', async (event) => { | |
const file = event.target.files[0]; | |
if (file.size != 2048) { | |
toastr.error('Invalid speaker file size, expected 2048 bytes'); | |
return; | |
} | |
const data = await getBase64Async(file); | |
const speaker = { | |
voice_id: file.name, | |
name: file.name, | |
data: data, | |
lang: 'en-US', | |
preview_url: false, | |
}; | |
this.settings.speakers.push(speaker); | |
$('#speecht5_tts_speaker').append($('<option>', { | |
value: speaker.voice_id, | |
text: speaker.name, | |
})); | |
$('#speecht5_tts_speaker').val(speaker.name); | |
this.onSettingsChange(); | |
}); | |
$('#speecht5_tts_delete_speaker_button').on('click', () => { | |
const confirmDelete = confirm('Are you sure you want to delete this speaker?'); | |
if (!confirmDelete) { | |
return; | |
} | |
const speaker = this.settings.speakers.find(s => s.voice_id === this.settings.speaker); | |
if (!speaker) { | |
toastr.error('Speaker not found'); | |
return; | |
} | |
const index = this.settings.speakers.indexOf(speaker); | |
this.settings.speakers.splice(index, 1); | |
$(`#speecht5_tts_speaker option[value="${speaker.voice_id}"]`).remove(); | |
if (this.settings.speakers.length == 0) { | |
console.log('No speakers left'); | |
return; | |
} | |
$('#speecht5_tts_speaker').val(this.settings.speakers[0].voice_id); | |
this.onSettingsChange(); | |
}); | |
await this.checkReady(); | |
console.debug('SpeechT5: Settings loaded'); | |
} | |
async checkReady() { | |
return Promise.resolve(); | |
} | |
async getVoice(voiceName) { | |
return this.settings.speakers.find(s => s.voice_id === voiceName); | |
} | |
async generateTts(text, voiceId) { | |
const response = await this.fetchTtsGeneration(text, voiceId); | |
return response; | |
} | |
async fetchTtsVoiceObjects() { | |
return this.settings.speakers; | |
} | |
async fetchTtsGeneration(inputText, voiceId) { | |
console.info(`Generating new TTS for voice_id ${voiceId}`); | |
const speaker = await this.getVoice(voiceId); | |
if (!speaker) { | |
toastr.error(`Speaker not found: ${voiceId}`, 'TTS Generation Failed'); | |
throw new Error(`Speaker not found: ${voiceId}`); | |
} | |
const response = await fetch( | |
'/api/speech/synthesize', | |
{ | |
method: 'POST', | |
headers: getRequestHeaders(), | |
body: JSON.stringify({ | |
'text': inputText, | |
'speaker': speaker.data, | |
'model': 'Xenova/speecht5_tts', | |
}), | |
}, | |
); | |
if (!response.ok) { | |
toastr.error(response.statusText, 'TTS Generation Failed'); | |
throw new Error(`HTTP ${response.status}: ${await response.text()}`); | |
} | |
return response; | |
} | |
async fetchTtsFromHistory(history_item_id) { | |
return Promise.resolve(history_item_id); | |
} | |
} | |