Baghdad99 commited on
Commit
2f47955
·
1 Parent(s): 0639911

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -12
app.py CHANGED
@@ -12,18 +12,12 @@ asr_processor = Wav2Vec2Processor.from_pretrained("jonatasgrosman/wav2vec2-large
12
  translator = pipeline("text2text-generation", model="dammyogt/damilola-finetuned-NLP-opus-mt-en-ha")
13
  tts = pipeline("text-to-speech", model="Baghdad99/hausa_voice_tts")
14
 
15
- def translate_speech(audio_data_tuple):
16
- # Extract the audio data from the tuple
17
- sample_rate, audio_data = audio_data_tuple
18
-
19
- # Resample the audio data to 16000 Hz
20
- audio_data_resampled = librosa.resample(audio_data, sample_rate, 16000)
21
-
22
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as temp_audio_file:
23
- sf.write(temp_audio_file.name, audio_data_resampled, 16000)
24
 
25
  # Prepare the input dictionary
26
- input_dict = asr_processor(audio_data_resampled, sampling_rate=16000, return_tensors="pt", padding=True) # Pass the resampled audio_data here
27
 
28
  # Use the ASR model to get the logits
29
  logits = asr_model(input_dict.input_values.to("cpu")).logits
@@ -66,11 +60,10 @@ def translate_speech(audio_data_tuple):
66
 
67
  return 16000, synthesised_speech
68
 
69
-
70
  # Define the Gradio interface
71
  iface = gr.Interface(
72
  fn=translate_speech,
73
- inputs=gr.inputs.Audio(source="microphone"), # Change this line
74
  outputs=gr.outputs.Audio(type="numpy"),
75
  title="English to Hausa Translation",
76
  description="Realtime demo for English to Hausa translation using speech recognition and text-to-speech synthesis."
 
12
  translator = pipeline("text2text-generation", model="dammyogt/damilola-finetuned-NLP-opus-mt-en-ha")
13
  tts = pipeline("text-to-speech", model="Baghdad99/hausa_voice_tts")
14
 
15
+ def translate_speech(audio_file_path):
16
+ # Load the audio file as a floating point time series
17
+ audio_data, sample_rate = librosa.load(audio_file_path, sr=16000)
 
 
 
 
 
 
18
 
19
  # Prepare the input dictionary
20
+ input_dict = asr_processor(audio_data, sampling_rate=16000, return_tensors="pt", padding=True) # Pass the resampled audio_data here
21
 
22
  # Use the ASR model to get the logits
23
  logits = asr_model(input_dict.input_values.to("cpu")).logits
 
60
 
61
  return 16000, synthesised_speech
62
 
 
63
  # Define the Gradio interface
64
  iface = gr.Interface(
65
  fn=translate_speech,
66
+ inputs=gr.inputs.Audio(type="file"), # Change this line
67
  outputs=gr.outputs.Audio(type="numpy"),
68
  title="English to Hausa Translation",
69
  description="Realtime demo for English to Hausa translation using speech recognition and text-to-speech synthesis."