Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -12,18 +12,12 @@ asr_processor = Wav2Vec2Processor.from_pretrained("jonatasgrosman/wav2vec2-large
|
|
12 |
translator = pipeline("text2text-generation", model="dammyogt/damilola-finetuned-NLP-opus-mt-en-ha")
|
13 |
tts = pipeline("text-to-speech", model="Baghdad99/hausa_voice_tts")
|
14 |
|
15 |
-
def translate_speech(
|
16 |
-
#
|
17 |
-
|
18 |
-
|
19 |
-
# Resample the audio data to 16000 Hz
|
20 |
-
audio_data_resampled = librosa.resample(audio_data, sample_rate, 16000)
|
21 |
-
|
22 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as temp_audio_file:
|
23 |
-
sf.write(temp_audio_file.name, audio_data_resampled, 16000)
|
24 |
|
25 |
# Prepare the input dictionary
|
26 |
-
input_dict = asr_processor(
|
27 |
|
28 |
# Use the ASR model to get the logits
|
29 |
logits = asr_model(input_dict.input_values.to("cpu")).logits
|
@@ -66,11 +60,10 @@ def translate_speech(audio_data_tuple):
|
|
66 |
|
67 |
return 16000, synthesised_speech
|
68 |
|
69 |
-
|
70 |
# Define the Gradio interface
|
71 |
iface = gr.Interface(
|
72 |
fn=translate_speech,
|
73 |
-
inputs=gr.inputs.Audio(
|
74 |
outputs=gr.outputs.Audio(type="numpy"),
|
75 |
title="English to Hausa Translation",
|
76 |
description="Realtime demo for English to Hausa translation using speech recognition and text-to-speech synthesis."
|
|
|
12 |
translator = pipeline("text2text-generation", model="dammyogt/damilola-finetuned-NLP-opus-mt-en-ha")
|
13 |
tts = pipeline("text-to-speech", model="Baghdad99/hausa_voice_tts")
|
14 |
|
15 |
+
def translate_speech(audio_file_path):
|
16 |
+
# Load the audio file as a floating point time series
|
17 |
+
audio_data, sample_rate = librosa.load(audio_file_path, sr=16000)
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
# Prepare the input dictionary
|
20 |
+
input_dict = asr_processor(audio_data, sampling_rate=16000, return_tensors="pt", padding=True) # Pass the resampled audio_data here
|
21 |
|
22 |
# Use the ASR model to get the logits
|
23 |
logits = asr_model(input_dict.input_values.to("cpu")).logits
|
|
|
60 |
|
61 |
return 16000, synthesised_speech
|
62 |
|
|
|
63 |
# Define the Gradio interface
|
64 |
iface = gr.Interface(
|
65 |
fn=translate_speech,
|
66 |
+
inputs=gr.inputs.Audio(type="file"), # Change this line
|
67 |
outputs=gr.outputs.Audio(type="numpy"),
|
68 |
title="English to Hausa Translation",
|
69 |
description="Realtime demo for English to Hausa translation using speech recognition and text-to-speech synthesis."
|