Spaces:

Baghdad99
/

english-to-hausa

Runtime error

Baghdad99 commited on Dec 21, 2023

Commit

2f47955

1 Parent(s): 0639911

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,18 +12,12 @@ asr_processor = Wav2Vec2Processor.from_pretrained("jonatasgrosman/wav2vec2-large
 translator = pipeline("text2text-generation", model="dammyogt/damilola-finetuned-NLP-opus-mt-en-ha")
 tts = pipeline("text-to-speech", model="Baghdad99/hausa_voice_tts")
-def translate_speech(audio_data_tuple):
-    # Extract the audio data from the tuple
-    sample_rate, audio_data = audio_data_tuple
-    # Resample the audio data to 16000 Hz
-    audio_data_resampled = librosa.resample(audio_data, sample_rate, 16000)
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as temp_audio_file:
-        sf.write(temp_audio_file.name, audio_data_resampled, 16000)
     # Prepare the input dictionary
-    input_dict = asr_processor(audio_data_resampled, sampling_rate=16000, return_tensors="pt", padding=True)  # Pass the resampled audio_data here
     # Use the ASR model to get the logits
     logits = asr_model(input_dict.input_values.to("cpu")).logits
@@ -66,11 +60,10 @@ def translate_speech(audio_data_tuple):
     return 16000, synthesised_speech
 # Define the Gradio interface
 iface = gr.Interface(
     fn=translate_speech,
-    inputs=gr.inputs.Audio(source="microphone"),  # Change this line
     outputs=gr.outputs.Audio(type="numpy"),
     title="English to Hausa Translation",
     description="Realtime demo for English to Hausa translation using speech recognition and text-to-speech synthesis."

 translator = pipeline("text2text-generation", model="dammyogt/damilola-finetuned-NLP-opus-mt-en-ha")
 tts = pipeline("text-to-speech", model="Baghdad99/hausa_voice_tts")
+def translate_speech(audio_file_path):
+    # Load the audio file as a floating point time series
+    audio_data, sample_rate = librosa.load(audio_file_path, sr=16000)
     # Prepare the input dictionary
+    input_dict = asr_processor(audio_data, sampling_rate=16000, return_tensors="pt", padding=True)  # Pass the resampled audio_data here
     # Use the ASR model to get the logits
     logits = asr_model(input_dict.input_values.to("cpu")).logits
     return 16000, synthesised_speech
 # Define the Gradio interface
 iface = gr.Interface(
     fn=translate_speech,
+    inputs=gr.inputs.Audio(type="file"),  # Change this line
     outputs=gr.outputs.Audio(type="numpy"),
     title="English to Hausa Translation",
     description="Realtime demo for English to Hausa translation using speech recognition and text-to-speech synthesis."