Spaces:

shukdevdatta123
/

VoiceChat

Sleeping

App Files Files Community

shukdevdatta123 commited on Jan 7

Commit

d19e200

verified ·

1 Parent(s): 4a8f80a

Update text_speech_utils.py

Browse files

Files changed (1) hide show

text_speech_utils.py +27 -21

text_speech_utils.py CHANGED Viewed

@@ -1,41 +1,47 @@
 import speech_recognition as sr
 from gtts import gTTS
 import os
-import wave
 def record_audio(filename, sec=5, sr=44100):
-    # Initialize recognizer class (for recognizing speech)
-    recognizer = sr.Recognizer()
-    # Set the microphone for recording
-    with sr.Microphone() as source:
-        print("Recording... Speak now!")
-        recognizer.adjust_for_ambient_noise(source)  # Adjust for ambient noise
-        audio = recognizer.listen(source, timeout=sec)
-        # Save the audio to a file
-        with open(filename, "wb") as f:
-            f.write(audio.get_wav_data())
 def transcribe_audio(filename):
     recognizer = sr.Recognizer()
     with sr.AudioFile(filename) as source:
         audio = recognizer.record(source)
     try:
-        # Recognize speech using Google's speech recognition
-        transcript = recognizer.recognize_google(audio)
-        return {"text": transcript}
     except sr.UnknownValueError:
-        return {"text": "Sorry, I could not understand the audio."}
     except sr.RequestError as e:
-        return {"text": f"Request failed; {e}"}
 def save_text_as_audio(text, audio_filename):
-    tts = gTTS(text=text, lang='en', slow=False)
     tts.save(audio_filename)
 def play_audio(filename):
-    # Play audio using the default system player (e.g., vlc, mplayer)
-    os.system(f"start {filename}")  # For Windows; on Linux/macOS use `os.system(f"mpg321 {filename}")`

+import sounddevice as sd
+import soundfile as sf
 import speech_recognition as sr
 from gtts import gTTS
 import os
+# Function to record audio using sounddevice and save it as a .wav file
 def record_audio(filename, sec=5, sr=44100):
+    print("Recording...")
+    # Record the audio from the microphone (mono channel)
+    audio_data = sd.rec(int(sec * sr), samplerate=sr, channels=1, dtype='int16')
+    sd.wait()  # Wait until recording is done
+    # Save the audio data to a file using soundfile
+    sf.write(filename, audio_data, sr)
+    print(f"Audio saved as {filename}")
+# Function to transcribe audio using Google's speech recognition
 def transcribe_audio(filename):
     recognizer = sr.Recognizer()
+    # Open the audio file using SpeechRecognition
     with sr.AudioFile(filename) as source:
         audio = recognizer.record(source)
     try:
+        print("Transcribing audio...")
+        # Use Google's speech recognition API
+        text = recognizer.recognize_google(audio)
+        print(f"Transcription: {text}")
+        return {"text": text}
     except sr.UnknownValueError:
+        return {"text": "Sorry, I couldn't understand the audio."}
     except sr.RequestError as e:
+        return {"text": f"Error in request: {e}"}
+# Function to save text as an audio file using gTTS (Google Text-to-Speech)
 def save_text_as_audio(text, audio_filename):
+    print("Converting text to speech...")
+    tts = gTTS(text=text, lang='en', slow=False)
     tts.save(audio_filename)
+    print(f"Audio saved as {audio_filename}")
+# Function to play audio using the system's default audio player
 def play_audio(filename):
+    print("Playing audio...")
+    os.system(f"start {filename}")  # For Windows; for Linux/macOS, use `os.system(f"mpg321 {filename}")`