Barani1-t commited on
Commit
48cb862
·
1 Parent(s): 21a68b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -24,8 +24,6 @@ speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze
24
 
25
 
26
  def translate(audio):
27
- print(audio)
28
- audio = librosa.resample(audio, orig_sr=22050, target_sr=16000)
29
  outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"language": "nl","task": "transcribe"})
30
  return outputs["text"]
31
 
@@ -37,7 +35,10 @@ def synthesise(text):
37
 
38
 
39
  def speech_to_speech_translation(audio):
40
- translated_text = translate(audio)
 
 
 
41
  synthesised_speech = synthesise(translated_text)
42
  synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
43
  return 16000, synthesised_speech
 
24
 
25
 
26
  def translate(audio):
 
 
27
  outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"language": "nl","task": "transcribe"})
28
  return outputs["text"]
29
 
 
35
 
36
 
37
  def speech_to_speech_translation(audio):
38
+ sampling_rate = 16000
39
+ data_array,samplerate = librosa.load(audio)
40
+ data_16 = librosa.resample(data_array, orig_sr=samplerate, target_sr=sampling_rate)
41
+ translated_text = translate(data_16)
42
  synthesised_speech = synthesise(translated_text)
43
  synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
44
  return 16000, synthesised_speech