calvpang commited on
Commit
812a24d
·
1 Parent(s): 44d3b6c

Attempt to fix input length issue

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -12,7 +12,7 @@ device = "cuda:0" if torch.cuda.is_available() else "cpu"
12
  asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=device)
13
 
14
  # load text-to-speech checkpoint and speaker embeddings
15
- processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
16
 
17
  model = SpeechT5ForTextToSpeech.from_pretrained("sanchit-gandhi/speecht5_tts_vox_nl").to(device)
18
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan").to(device)
@@ -28,7 +28,9 @@ def translate(audio):
28
 
29
  def synthesise(text):
30
  inputs = processor(text=text, return_tensors="pt")
31
- speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
 
 
32
  return speech.cpu()
33
 
34
 
 
12
  asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=device)
13
 
14
  # load text-to-speech checkpoint and speaker embeddings
15
+ processor = SpeechT5Processor.from_pretrained("sanchit-gandhi/speecht5_tts_vox_nl")
16
 
17
  model = SpeechT5ForTextToSpeech.from_pretrained("sanchit-gandhi/speecht5_tts_vox_nl").to(device)
18
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan").to(device)
 
28
 
29
  def synthesise(text):
30
  inputs = processor(text=text, return_tensors="pt")
31
+ # Reducing the input length to 600
32
+ inputs = inputs["input_ids"][:600]
33
+ speech = model.generate_speech(inputs.to(device), speaker_embeddings.to(device), vocoder=vocoder)
34
  return speech.cpu()
35
 
36