NicolasDenier commited on
Commit
74bdee5
·
1 Parent(s): 1c9743e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -4
app.py CHANGED
@@ -14,15 +14,20 @@ asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base",
14
  # load text-to-speech checkpoint and speaker embeddings
15
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
16
 
17
- model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts").to(device)
18
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan").to(device)
19
 
20
- embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
21
- speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
 
 
 
 
 
22
 
23
 
24
  def translate(audio):
25
- outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "translate"})
26
  return outputs["text"]
27
 
28
 
 
14
  # load text-to-speech checkpoint and speaker embeddings
15
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
16
 
17
+ model = SpeechT5ForTextToSpeech.from_pretrained("Sandiago21/speecht5_finetuned_facebook_voxpopuli_french").to(device)
18
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan").to(device)
19
 
20
+ def npy_loader(path):
21
+ np_sample = np.transpose(np.load(path))
22
+ sample = torch.from_numpy(np_sample)
23
+ return sample
24
+ #embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
25
+ xvector_path = "xvectors/french_recording-bernard-candide_segment_090.npy"
26
+ speaker_embeddings = torch.tensor(npy_loader(xvector_path))
27
 
28
 
29
  def translate(audio):
30
+ outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe", "language": "fr"})
31
  return outputs["text"]
32
 
33