mboushaba commited on
Commit
4c18362
·
verified ·
1 Parent(s): 972f6c3

Create voice_recognition.py

Browse files
Files changed (1) hide show
  1. voice_recognition.py +108 -0
voice_recognition.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import speech_recognition as sr
3
+ from deep_translator import GoogleTranslator
4
+ import ffmpeg
5
+ import random
6
+ import string
7
+
8
+ # Constants
9
+ AUDIO_FILE_PATH = "audio/test-ph-3.m4a"
10
+ SOURCE_LANG = "fil-PH"
11
+ TARGET_LANG = "en"
12
+
13
+
14
+ def convert_audio_to_wav(input_audio_path, output_wav_path):
15
+ """
16
+ Convert any audio format (like M4A) to WAV using ffmpeg and return the output WAV file path.
17
+ """
18
+ try:
19
+ ffmpeg.input(input_audio_path).output(output_wav_path, acodec='pcm_s16le', ar=44100).run()
20
+ print(f"Audio successfully converted to WAV: {output_wav_path}")
21
+ return output_wav_path
22
+ except ffmpeg.Error as e:
23
+ print(f"Error converting {input_audio_path} to WAV: {e}")
24
+ return None
25
+ except Exception as e:
26
+ print(f"Error converting {input_audio_path} to WAV: {e}")
27
+ return None
28
+
29
+
30
+ def recognize_speech_from_wav(model, wav_file_path, source_lang):
31
+ """
32
+ Recognize speech from a WAV file using the Whisper recognition model.
33
+ """
34
+ recognizer = sr.Recognizer()
35
+ with sr.AudioFile(wav_file_path) as source:
36
+ try:
37
+ #print('Transcribing audio to text...')
38
+ recognizer.adjust_for_ambient_noise(source)
39
+ audio_data = recognizer.record(source)
40
+ if model.lower() == "whisper":
41
+ text = recognizer.recognize_whisper(audio_data, language=source_lang)
42
+ elif model.lower() == "google":
43
+ text = recognizer.recognize_google(audio_data, language=source_lang)
44
+ else:
45
+ print(f"Invalid model name: {model}")
46
+ return None
47
+ return text
48
+ except sr.UnknownValueError:
49
+ print("Could not understand the audio.")
50
+ return None
51
+ except sr.RequestError as e:
52
+ print(f"Could not request results from the service; {e}")
53
+ return None
54
+ except Exception as e:
55
+ print(f"Could not request results from the service; {e}")
56
+ return None
57
+
58
+
59
+ def translate_text(text, target_lang):
60
+ """
61
+ Translate the recognized text into the target language using Google Translator.
62
+ """
63
+ try:
64
+ return GoogleTranslator(source='auto', target=target_lang).translate(text)
65
+ except Exception as e:
66
+ print(f"Error translating text: {e}")
67
+ return None
68
+
69
+
70
+ def process_audio_recognition(model="whisper", audio_path=None, source_lang="en", target_lang="en", translate=False):
71
+ """
72
+ Main function to handle audio recognition and optional translation.
73
+ Converts the audio to WAV, recognizes speech, and optionally translates it.
74
+ """
75
+ wav_file = audio_path
76
+ if wav_file and not wav_file.endswith(".wav"):
77
+ wav_file = convert_audio_to_wav(audio_path, ''.join(random.choices(string.ascii_uppercase + string.ascii_lowercase, k=5)) + "converted_audio.wav")
78
+
79
+ if not wav_file:
80
+ print(f"Failed to process the audio file: {audio_path}")
81
+ return None
82
+
83
+ text = recognize_speech_from_wav(model, wav_file, source_lang)
84
+ if text:
85
+ # print(f"############# RECOGNIZED TEXT ({source_lang}) ##################")
86
+ # print(text)
87
+ # print("################################################")
88
+
89
+ if translate:
90
+ translated_text = translate_text(text, target_lang)
91
+ if translated_text:
92
+ # print(f"############# TRANSLATED TEXT ({target_lang}) ##################")
93
+ # print(translated_text)
94
+ # print("################################################")
95
+ text = translated_text
96
+
97
+ # Cleanup temporary WAV file
98
+ # try:
99
+ # #os.remove(wav_file)
100
+ # print(f"Temporary WAV file {wav_file} removed.")
101
+ # except OSError as e:
102
+ # print(f"Error removing temporary WAV file {wav_file}: {e}")
103
+
104
+ return text
105
+
106
+
107
+ if __name__ == '__main__':
108
+ process_audio_recognition(AUDIO_FILE_PATH, SOURCE_LANG, TARGET_LANG, translate=True)