Spaces:

propilot
/

ai-speech-recognition

Sleeping

App Files Files Community

cadasme commited on Jun 27, 2023

Commit

ff5f53a

1 Parent(s): eb3cdc8

first commit

Browse files

Files changed (2) hide show

app.py +50 -62
requirements.txt +1 -3

app.py CHANGED Viewed

@@ -1,14 +1,17 @@
-# Import the required libraries
 import streamlit as st
 import whisper
 import speech_recognition as sr
 from pydub import AudioSegment
-import os
-import sounddevice as sd
-import numpy as np
-from scipy.io.wavfile import write
-import os
 # Function to transcribe audio using OpenAI Whisper
 def transcribe_whisper(model_name, file_path):
@@ -24,86 +27,71 @@ def transcribe_speech_recognition(file_path):
         audio = r.record(source)
     try:
-        result = r.recognize_google(audio, language='es')
         return result
     except sr.UnknownValueError:
         return "No se pudo reconocer ningún texto en el audio."
-# Function to convert mp3 file to wav
-def convert_mp3_to_wav(mp3_path):
-    audio = AudioSegment.from_mp3(mp3_path)
-    wav_path = mp3_path.replace('.mp3', '.wav')
-    audio.export(wav_path, format="wav")
-    return wav_path
-# Function to record audio
-def record_audio(filename, duration):
-    fs = 44100  # Sample rate
-    channels = 2  # Number of channels (1 for mono, 2 for stereo)
-    # Start recording
-    recording = sd.rec(int(duration * fs), samplerate=fs, channels=channels)
-    sd.wait()  # Wait until recording is finished
-    # Create temp directory if it doesn't exist
-    if not os.path.exists(os.path.dirname(filename)):
-        os.makedirs(os.path.dirname(filename))
-    # Save as WAV file
-    write(filename, fs, recording)
 def main():
     st.title('Transcriptor de Audio')
     # Choose the transcription method and model
     transcription_method = st.selectbox('Escoge el método de transcripción', ('OpenAI Whisper', 'Google Speech API'))
     if transcription_method == 'OpenAI Whisper':
         model_name = st.selectbox('Escoge el modelo de Whisper', ('base', 'small', 'medium', 'large', 'tiny'))
-    option = st.selectbox('Escoge la opción', ('Subir un archivo', 'Grabar audio en tiempo real'))
     if option == 'Subir un archivo':
         uploaded_file = st.file_uploader("Sube tu archivo de audio para transcribir", type=['wav', 'mp3'])
         if uploaded_file is not None:
-            file_details = {"FileName": uploaded_file.name, "FileType": uploaded_file.type, "FileSize": uploaded_file.size}
-            st.write(file_details)
-            # Save uploaded file to temp directory
-            file_path = os.path.join("temp", uploaded_file.name)
-            with open(file_path, "wb") as f:
-                f.write(uploaded_file.getbuffer())
-            st.write("Archivo de audio cargado correctamente. Transcribiendo...")
-            with st.spinner('Transcribiendo...'):
-                if uploaded_file.name.endswith('.mp3') and transcription_method != 'OpenAI Whisper':
-                    # Convert mp3 to wav if Google Speech API is selected and file is in mp3 format
-                    file_path = convert_mp3_to_wav(file_path)
-                # Perform transcription
-                if transcription_method == 'OpenAI Whisper':
-                    transcript = transcribe_whisper(model_name, file_path)
-                else:
-                    transcript = transcribe_speech_recognition(file_path)
-                st.text_area('Resultado de la Transcripción:', transcript, height=200)
     elif option == 'Grabar audio en tiempo real':
-        duration = st.slider("Selecciona la duración de la grabación (segundos)", 1, 10, 5)
-        start_recording = st.button('Empezar a grabar')
-        if start_recording:
-            filename = "temp/recorded_audio.wav"
-            st.write("Grabación en progreso...")
-            with st.spinner('Grabando...'):
-                record_audio(filename, duration)
             st.write("Grabación finalizada. Transcribiendo...")
             with st.spinner('Transcribiendo...'):
                 # Perform transcription
-                if transcription_method == 'OpenAI Whisper':
-                    transcript = transcribe_whisper(model_name, filename)
-                else:
-                    transcript = transcribe_speech_recognition(filename)
                 st.text_area('Resultado de la Transcripción:', transcript, height=200)

 import streamlit as st
+import os
+import tempfile
 import whisper
 import speech_recognition as sr
 from pydub import AudioSegment
+from audio_recorder_streamlit import audio_recorder
+# Function to convert mp3 file to wav
+def convert_mp3_to_wav(mp3_path):
+    audio = AudioSegment.from_mp3(mp3_path)
+    wav_path = mp3_path.replace('.mp3', '.wav')
+    audio.export(wav_path, format="wav")
+    return wav_path
 # Function to transcribe audio using OpenAI Whisper
 def transcribe_whisper(model_name, file_path):
         audio = r.record(source)
     try:
+        result = r.recognize_google(audio, language='spanish')
         return result
     except sr.UnknownValueError:
         return "No se pudo reconocer ningún texto en el audio."
+# Function to perform transcription based on selected method
+def perform_transcription(transcription_method, model_name, audio_path):
+    if transcription_method == 'OpenAI Whisper':
+        return transcribe_whisper(model_name, audio_path)
+    else:
+        return transcribe_speech_recognition(audio_path)
+# Function to handle uploaded file transcription
+def handle_uploaded_file(uploaded_file, transcription_method, model_name):
+    file_details = {"FileName": uploaded_file.name, "FileType": uploaded_file.type, "FileSize": uploaded_file.size}
+    st.write(file_details)
+    # Save uploaded file to temp directory
+    os.makedirs("temp", exist_ok=True)  # Create temp directory if it doesn't exist
+    file_path = os.path.join("temp", uploaded_file.name)
+    with open(file_path, "wb") as f:
+        f.write(uploaded_file.getbuffer())
+    with st.spinner('Transcribiendo...'):
+        if uploaded_file.name.endswith('.mp3') and transcription_method != 'OpenAI Whisper':
+            # Convert mp3 to wav if Google Speech API is selected and file is in mp3 format
+            file_path = convert_mp3_to_wav(file_path)
+        # Perform transcription
+        transcript = perform_transcription(transcription_method, model_name, file_path)
+    st.text_area('Resultado de la Transcripción:', transcript, height=200)
 def main():
     st.title('Transcriptor de Audio')
     # Choose the transcription method and model
+    option = st.selectbox('Escoger Modelo de Transcripción', ('Subir un archivo', 'Grabar audio en tiempo real'))
     transcription_method = st.selectbox('Escoge el método de transcripción', ('OpenAI Whisper', 'Google Speech API'))
     if transcription_method == 'OpenAI Whisper':
         model_name = st.selectbox('Escoge el modelo de Whisper', ('base', 'small', 'medium', 'large', 'tiny'))
     if option == 'Subir un archivo':
         uploaded_file = st.file_uploader("Sube tu archivo de audio para transcribir", type=['wav', 'mp3'])
         if uploaded_file is not None:
+            handle_uploaded_file(uploaded_file, transcription_method, model_name)
     elif option == 'Grabar audio en tiempo real':
+        duration = 5
+        # duration = st.slider("Selecciona la duración de la grabación (segundos)", 1, 10, 5)
+        # st.write("Duración de la grabación:", duration, "segundos")
+        audio_bytes = audio_recorder(pause_threshold=duration, sample_rate=16_000)
+        if audio_bytes:
             st.write("Grabación finalizada. Transcribiendo...")
             with st.spinner('Transcribiendo...'):
+                # Save recorded audio to a temporary file
+                with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_audio:
+                    temp_path = temp_audio.name
+                    temp_audio.write(audio_bytes)
                 # Perform transcription
+                transcript = perform_transcription(transcription_method, model_name, temp_path)
                 st.text_area('Resultado de la Transcripción:', transcript, height=200)

requirements.txt CHANGED Viewed

@@ -1,6 +1,4 @@
-sounddevice
-numpy
-scipy
 pydub
 streamlit
 python-dotenv

+audio-recorder-streamlit
 pydub
 streamlit
 python-dotenv