speech_recognize1

Sleeping

App Files Files Community

mr2along commited on Oct 11, 2024

Commit

8ef9310

verified ·

1 Parent(s): 37c445c

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -66

app.py CHANGED Viewed

@@ -1,69 +1,29 @@
 import speech_recognition as sr
 import difflib
-import wave
-import pyaudio
 import gradio as gr
-# Step 1: Record audio
-def record_audio(filename):
-    chunk = 1024  # Record in chunks of 1024 samples
-    sample_format = pyaudio.paInt16  # 16 bits per sample
-    channels = 1
-    fs = 44100  # Record at 44100 samples per second
-    seconds = 10  # Length of recording
-    p = pyaudio.PyAudio()  # Create an interface to PortAudio
-    print("Recording...")
-    stream = p.open(format=sample_format,
-                    channels=channels,
-                    rate=fs,
-                    frames_per_buffer=chunk,
-                    input=True)
-    frames = []  # Initialize array to store frames
-    # Store data in chunks for the specified duration
-    for _ in range(0, int(fs / chunk * seconds)):
-        data = stream.read(chunk)
-        frames.append(data)
-    # Stop and close the stream
-    stream.stop_stream()
-    stream.close()
-    p.terminate()
-    # Save the recorded audio as a WAV file
-    wf = wave.open(filename, 'wb')
-    wf.setnchannels(channels)
-    wf.setsampwidth(p.get_sample_size(sample_format))
-    wf.setframerate(fs)
-    wf.writeframes(b''.join(frames))
-    wf.close()
-    print("Recording completed.")
-# Step 2: Transcribe the audio file
-def transcribe_audio(filename):
     recognizer = sr.Recognizer()
-    # Open the audio file for transcription
-    with sr.AudioFile(filename) as source:
-        audio = recognizer.record(source)
-        try:
-            # Recognize the audio using Google Web Speech API
-            print("Transcribing the audio...")
-            transcription = recognizer.recognize_google(audio)
-            print("Transcription completed.")
-            return transcription
-        except sr.UnknownValueError:
-            print("Google Speech Recognition could not understand the audio")
-            return ""
-        except sr.RequestError as e:
-            print(f"Error with Google Speech Recognition service: {e}")
-            return ""
-# Step 3: Compare the transcribed text with the input paragraph
 def compare_texts(reference_text, transcribed_text):
     word_scores = []
     reference_words = reference_text.split()
@@ -100,12 +60,9 @@ def compare_texts(reference_text, transcribed_text):
     return output
 # Gradio Interface Function
-def gradio_function(paragraph):
-    # Record the audio (the filename will be 'recorded_audio.wav')
-    record_audio("recorded_audio.wav")
     # Transcribe the audio
-    transcribed_text = transcribe_audio("recorded_audio.wav")
     # Compare the original paragraph with the transcribed text
     comparison_result = compare_texts(paragraph, transcribed_text)
@@ -116,7 +73,10 @@ def gradio_function(paragraph):
 # Gradio Interface
 interface = gr.Interface(
     fn=gradio_function,
-    inputs=gr.inputs.Textbox(lines=5, label="Input Paragraph"),
     outputs="json",
     title="Speech Recognition Comparison",
     description="Input a paragraph, record your audio, and compare the transcription to the original text."

 import speech_recognition as sr
 import difflib
 import gradio as gr
+# Step 1: Transcribe the audio file
+def transcribe_audio(audio):
     recognizer = sr.Recognizer()
+    # Convert audio into recognizable format for the Recognizer
+    audio_file = sr.AudioFile(audio.name)
+    with audio_file as source:
+        audio_data = recognizer.record(source)
+    try:
+        # Recognize the audio using Google Web Speech API
+        print("Transcribing the audio...")
+        transcription = recognizer.recognize_google(audio_data)
+        print("Transcription completed.")
+        return transcription
+    except sr.UnknownValueError:
+        return "Google Speech Recognition could not understand the audio"
+    except sr.RequestError as e:
+        return f"Error with Google Speech Recognition service: {e}"
+# Step 2: Compare the transcribed text with the input paragraph
 def compare_texts(reference_text, transcribed_text):
     word_scores = []
     reference_words = reference_text.split()
     return output
 # Gradio Interface Function
+def gradio_function(paragraph, audio):
     # Transcribe the audio
+    transcribed_text = transcribe_audio(audio)
     # Compare the original paragraph with the transcribed text
     comparison_result = compare_texts(paragraph, transcribed_text)
 # Gradio Interface
 interface = gr.Interface(
     fn=gradio_function,
+    inputs=[
+        gr.inputs.Textbox(lines=5, label="Input Paragraph"),
+        gr.inputs.Audio(source="microphone", type="file", label="Record Audio")
+    ],
     outputs="json",
     title="Speech Recognition Comparison",
     description="Input a paragraph, record your audio, and compare the transcription to the original text."