|
import torch |
|
from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor |
|
import sentencepiece |
|
|
|
MODEL_ID = "microsoft/speecht5_tts" |
|
processor = SpeechT5Processor.from_pretrained(MODEL_ID) |
|
model = SpeechT5ForTextToSpeech.from_pretrained(MODEL_ID) |
|
|
|
def synthesize_speech(text): |
|
if not text: |
|
return "ERROR: Please provide text for synthesis" |
|
|
|
inputs = processor(text, return_tensors="pt") |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
model.to(device) |
|
inputs = inputs.to(device) |
|
|
|
with torch.no_grad(): |
|
speech = model.generate(**inputs) |
|
|
|
return processor.decode(speech, skip_special_tokens=True) |
|
|