Spaces:

propilot
/

ai-speech-recognition

Sleeping

App Files Files Community

cadasme commited on Jun 27, 2023

Commit

6f58142

1 Parent(s): ff5f53a

working

Browse files

Files changed (1) hide show

app.py +7 -7

app.py CHANGED Viewed

@@ -23,11 +23,11 @@ def transcribe_whisper(model_name, file_path):
 def transcribe_speech_recognition(file_path):
     r = sr.Recognizer()
     with sr.AudioFile(file_path) as source:
-        r.adjust_for_ambient_noise(source, duration=0.5)  # Adjust ambient noise threshold
         audio = r.record(source)
     try:
-        result = r.recognize_google(audio, language='spanish')
         return result
     except sr.UnknownValueError:
         return "No se pudo reconocer ningún texto en el audio."
@@ -66,6 +66,9 @@ def main():
     # Choose the transcription method and model
     option = st.selectbox('Escoger Modelo de Transcripción', ('Subir un archivo', 'Grabar audio en tiempo real'))
     transcription_method = st.selectbox('Escoge el método de transcripción', ('OpenAI Whisper', 'Google Speech API'))
     if transcription_method == 'OpenAI Whisper':
         model_name = st.selectbox('Escoge el modelo de Whisper', ('base', 'small', 'medium', 'large', 'tiny'))
@@ -76,11 +79,7 @@ def main():
             handle_uploaded_file(uploaded_file, transcription_method, model_name)
     elif option == 'Grabar audio en tiempo real':
-        duration = 5
-        # duration = st.slider("Selecciona la duración de la grabación (segundos)", 1, 10, 5)
-        # st.write("Duración de la grabación:", duration, "segundos")
-        audio_bytes = audio_recorder(pause_threshold=duration, sample_rate=16_000)
         if audio_bytes:
             st.write("Grabación finalizada. Transcribiendo...")
@@ -95,5 +94,6 @@ def main():
                 st.text_area('Resultado de la Transcripción:', transcript, height=200)
 if __name__ == "__main__":
     main()

 def transcribe_speech_recognition(file_path):
     r = sr.Recognizer()
     with sr.AudioFile(file_path) as source:
+        r.adjust_for_ambient_noise(source, duration=0.25)  # Adjust ambient noise threshold
         audio = r.record(source)
     try:
+        result = r.recognize_google(audio, language='es')
         return result
     except sr.UnknownValueError:
         return "No se pudo reconocer ningún texto en el audio."
     # Choose the transcription method and model
     option = st.selectbox('Escoger Modelo de Transcripción', ('Subir un archivo', 'Grabar audio en tiempo real'))
     transcription_method = st.selectbox('Escoge el método de transcripción', ('OpenAI Whisper', 'Google Speech API'))
+    model_name = None  # Initialize model_name with a default value
     if transcription_method == 'OpenAI Whisper':
         model_name = st.selectbox('Escoge el modelo de Whisper', ('base', 'small', 'medium', 'large', 'tiny'))
             handle_uploaded_file(uploaded_file, transcription_method, model_name)
     elif option == 'Grabar audio en tiempo real':
+        audio_bytes = audio_recorder(pause_threshold=5, sample_rate=16_000)
         if audio_bytes:
             st.write("Grabación finalizada. Transcribiendo...")
                 st.text_area('Resultado de la Transcripción:', transcript, height=200)
 if __name__ == "__main__":
     main()