Baghdad99 commited on
Commit
2ad4835
·
1 Parent(s): 868f598

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -32
app.py CHANGED
@@ -26,51 +26,47 @@ def translate_speech(audio_data_tuple):
26
  output = model.transcribe([temp_audio_file.name])
27
  print(f"Output: {output}") # Print the output to see what it contains
28
 
29
- # Check if the output contains 'transcription'
30
- if 'transcription' in output:
31
- transcription = output["transcription"]
32
- else:
33
- print("The output does not contain 'transcription'")
34
- return
35
-
36
 
37
- # Use the translation pipeline to translate the transcription
38
- translated_text = translator(transciption, return_tensors="pt")
39
- print(f"Translated text: {translated_text}") # Print the translated text to see what it contains
 
40
 
41
- # Check if the translated text contains 'generated_token_ids'
42
- if 'generated_token_ids' in translated_text[0]:
43
- # Decode the tokens into text
44
- translated_text_str = translator.tokenizer.decode(translated_text[0]['generated_token_ids'])
45
- else:
46
- print("The translated text does not contain 'generated_token_ids'")
47
- return
48
 
49
- # Use the text-to-speech pipeline to synthesize the translated text
50
- synthesised_speech = tts(translated_text_str)
51
- print(f"Synthesised speech: {synthesised_speech}") # Print the synthesised speech to see what it contains
52
 
53
- # Check if the synthesised speech contains 'audio'
54
- if 'audio' in synthesised_speech:
55
- synthesised_speech_data = synthesised_speech['audio']
56
- else:
57
- print("The synthesised speech does not contain 'audio'")
58
- return
59
 
60
- # Flatten the audio data
61
- synthesised_speech_data = synthesised_speech_data.flatten()
62
 
63
- # Scale the audio data to the range of int16 format
64
- synthesised_speech = (synthesised_speech_data * 32767).astype(np.int16)
65
 
66
- return 16000, synthesised_speech
67
 
68
  # Define the Gradio interface
69
  iface = gr.Interface(
70
  fn=translate_speech,
71
  inputs=gr.inputs.Audio(source="microphone"), # Change this line
72
  outputs=gr.outputs.Audio(type="numpy"),
73
- title="Hausa to English Translation",
74
  description="Realtime demo for Hausa to English translation using speech recognition and text-to-speech synthesis."
75
  )
76
 
 
26
  output = model.transcribe([temp_audio_file.name])
27
  print(f"Output: {output}") # Print the output to see what it contains
28
 
29
+ # Extract the transcriptions from the outputs
30
+ transcriptions = [output['transcription'] for output in outputs]
 
 
 
 
 
31
 
32
+ for transcription in transcriptions:
33
+ # Use the translation pipeline to translate the transcription
34
+ translated_text = translator(transcription, return_tensors="pt")
35
+ print(f"Translated text: {translated_text}") # Print the translated text to see what it contains
36
 
37
+ # Check if the translated text contains 'generated_token_ids'
38
+ if 'generated_token_ids' in translated_text[0]:
39
+ # Decode the tokens into text
40
+ translated_text_str = translator.tokenizer.decode(translated_text[0]['generated_token_ids'])
41
+ else:
42
+ print("The translated text does not contain 'generated_token_ids'")
43
+ return
44
 
45
+ # Use the text-to-speech pipeline to synthesize the translated text
46
+ synthesised_speech = tts(translated_text_str)
47
+ print(f"Synthesised speech: {synthesised_speech}") # Print the synthesised speech to see what it contains
48
 
49
+ # Check if the synthesised speech contains 'audio'
50
+ if 'audio' in synthesised_speech:
51
+ synthesised_speech_data = synthesised_speech['audio']
52
+ else:
53
+ print("The synthesised speech does not contain 'audio'")
54
+ return
55
 
56
+ # Flatten the audio data
57
+ synthesised_speech_data = synthesised_speech_data.flatten()
58
 
59
+ # Scale the audio data to the range of int16 format
60
+ synthesised_speech = (synthesised_speech_data * 32767).astype(np.int16)
61
 
62
+ return 16000, synthesised_speech
63
 
64
  # Define the Gradio interface
65
  iface = gr.Interface(
66
  fn=translate_speech,
67
  inputs=gr.inputs.Audio(source="microphone"), # Change this line
68
  outputs=gr.outputs.Audio(type="numpy"),
69
+ title="English to Hausa Translation",
70
  description="Realtime demo for Hausa to English translation using speech recognition and text-to-speech synthesis."
71
  )
72