jason1i commited on
Commit
48e001b
·
1 Parent(s): 33bb1b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -14,23 +14,26 @@ asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base",
14
  # load text-to-speech checkpoint and speaker embeddings
15
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
16
 
17
- model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts").to(device)
 
 
 
18
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan").to(device)
19
 
20
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
21
  speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
22
 
23
 
24
- def translate(audio):
25
- outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "translate"})
26
- return outputs["text"]
27
 
28
 
29
- # Added to Base to translate from Language X to any Language Y using "task": "transcribe"
30
  # At Inference. it should use translate(sample["audio"].copy())
31
 
32
  def translate(audio):
33
- outputs = pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe", "language": "es"})
34
  return outputs["text"]
35
 
36
 
 
14
  # load text-to-speech checkpoint and speaker embeddings
15
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
16
 
17
+ #model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts").to(device)
18
+ #Use own TTS Model
19
+ model = SpeechT5ForTextToSpeech.from_pretrained("jasonl1/speecht5_finetuned_voxpopuli_fi")
20
+
21
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan").to(device)
22
 
23
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
24
  speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
25
 
26
 
27
+ #def translate(audio):
28
+ # outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "translate"})
29
+ # return outputs["text"]
30
 
31
 
32
+ # Added to Base to translate from Language X to any Language Y ="fi" using "task": "transcribe"
33
  # At Inference. it should use translate(sample["audio"].copy())
34
 
35
  def translate(audio):
36
+ outputs = pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe", "language": "fi"})
37
  return outputs["text"]
38
 
39