Spaces:

mboushaba
/

audio-transcriber

Sleeping

App Files Files Community

mboushaba commited on Sep 24, 2024

Commit

4c18362

verified ·

1 Parent(s): 972f6c3

Create voice_recognition.py

Browse files

Files changed (1) hide show

voice_recognition.py +108 -0

voice_recognition.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import os
+import speech_recognition as sr
+from deep_translator import GoogleTranslator
+import ffmpeg
+import random
+import string
+# Constants
+AUDIO_FILE_PATH = "audio/test-ph-3.m4a"
+SOURCE_LANG = "fil-PH"
+TARGET_LANG = "en"
+def convert_audio_to_wav(input_audio_path, output_wav_path):
+    """
+    Convert any audio format (like M4A) to WAV using ffmpeg and return the output WAV file path.
+    """
+    try:
+        ffmpeg.input(input_audio_path).output(output_wav_path, acodec='pcm_s16le', ar=44100).run()
+        print(f"Audio successfully converted to WAV: {output_wav_path}")
+        return output_wav_path
+    except ffmpeg.Error as e:
+        print(f"Error converting {input_audio_path} to WAV: {e}")
+        return None
+    except Exception as e:
+        print(f"Error converting {input_audio_path} to WAV: {e}")
+        return None
+def recognize_speech_from_wav(model, wav_file_path, source_lang):
+    """
+    Recognize speech from a WAV file using the Whisper recognition model.
+    """
+    recognizer = sr.Recognizer()
+    with sr.AudioFile(wav_file_path) as source:
+        try:
+            #print('Transcribing audio to text...')
+            recognizer.adjust_for_ambient_noise(source)
+            audio_data = recognizer.record(source)
+            if model.lower() == "whisper":
+                text = recognizer.recognize_whisper(audio_data, language=source_lang)
+            elif model.lower() == "google":
+                text = recognizer.recognize_google(audio_data, language=source_lang)
+            else:
+                print(f"Invalid model name: {model}")
+                return None
+            return text
+        except sr.UnknownValueError:
+            print("Could not understand the audio.")
+            return None
+        except sr.RequestError as e:
+            print(f"Could not request results from the service; {e}")
+            return None
+        except Exception as e:
+            print(f"Could not request results from the service; {e}")
+            return None
+def translate_text(text, target_lang):
+    """
+    Translate the recognized text into the target language using Google Translator.
+    """
+    try:
+        return GoogleTranslator(source='auto', target=target_lang).translate(text)
+    except Exception as e:
+        print(f"Error translating text: {e}")
+        return None
+def process_audio_recognition(model="whisper", audio_path=None, source_lang="en", target_lang="en", translate=False):
+    """
+    Main function to handle audio recognition and optional translation.
+    Converts the audio to WAV, recognizes speech, and optionally translates it.
+    """
+    wav_file =  audio_path
+    if wav_file and not wav_file.endswith(".wav"):
+        wav_file = convert_audio_to_wav(audio_path, ''.join(random.choices(string.ascii_uppercase + string.ascii_lowercase, k=5)) + "converted_audio.wav")
+    if not wav_file:
+        print(f"Failed to process the audio file: {audio_path}")
+        return None
+    text = recognize_speech_from_wav(model, wav_file, source_lang)
+    if text:
+        # print(f"############# RECOGNIZED TEXT ({source_lang}) ##################")
+        # print(text)
+        # print("################################################")
+        if translate:
+            translated_text = translate_text(text, target_lang)
+            if translated_text:
+                # print(f"############# TRANSLATED TEXT ({target_lang}) ##################")
+                # print(translated_text)
+                # print("################################################")
+                text = translated_text
+    # Cleanup temporary WAV file
+    # try:
+    #     #os.remove(wav_file)
+    #     print(f"Temporary WAV file {wav_file} removed.")
+    # except OSError as e:
+    #     print(f"Error removing temporary WAV file {wav_file}: {e}")
+    return text
+if __name__ == '__main__':
+    process_audio_recognition(AUDIO_FILE_PATH, SOURCE_LANG, TARGET_LANG, translate=True)