shukdevdatta123 commited on
Commit
d19e200
·
verified ·
1 Parent(s): 4a8f80a

Update text_speech_utils.py

Browse files
Files changed (1) hide show
  1. text_speech_utils.py +27 -21
text_speech_utils.py CHANGED
@@ -1,41 +1,47 @@
 
 
1
  import speech_recognition as sr
2
  from gtts import gTTS
3
  import os
4
- import wave
5
 
 
6
  def record_audio(filename, sec=5, sr=44100):
7
- # Initialize recognizer class (for recognizing speech)
8
- recognizer = sr.Recognizer()
9
-
10
- # Set the microphone for recording
11
- with sr.Microphone() as source:
12
- print("Recording... Speak now!")
13
- recognizer.adjust_for_ambient_noise(source) # Adjust for ambient noise
14
- audio = recognizer.listen(source, timeout=sec)
15
-
16
- # Save the audio to a file
17
- with open(filename, "wb") as f:
18
- f.write(audio.get_wav_data())
19
 
 
20
  def transcribe_audio(filename):
21
  recognizer = sr.Recognizer()
22
 
 
23
  with sr.AudioFile(filename) as source:
24
  audio = recognizer.record(source)
25
 
26
  try:
27
- # Recognize speech using Google's speech recognition
28
- transcript = recognizer.recognize_google(audio)
29
- return {"text": transcript}
 
 
30
  except sr.UnknownValueError:
31
- return {"text": "Sorry, I could not understand the audio."}
32
  except sr.RequestError as e:
33
- return {"text": f"Request failed; {e}"}
34
 
 
35
  def save_text_as_audio(text, audio_filename):
36
- tts = gTTS(text=text, lang='en', slow=False)
 
37
  tts.save(audio_filename)
 
38
 
 
39
  def play_audio(filename):
40
- # Play audio using the default system player (e.g., vlc, mplayer)
41
- os.system(f"start {filename}") # For Windows; on Linux/macOS use `os.system(f"mpg321 {filename}")`
 
1
+ import sounddevice as sd
2
+ import soundfile as sf
3
  import speech_recognition as sr
4
  from gtts import gTTS
5
  import os
 
6
 
7
+ # Function to record audio using sounddevice and save it as a .wav file
8
  def record_audio(filename, sec=5, sr=44100):
9
+ print("Recording...")
10
+ # Record the audio from the microphone (mono channel)
11
+ audio_data = sd.rec(int(sec * sr), samplerate=sr, channels=1, dtype='int16')
12
+ sd.wait() # Wait until recording is done
13
+
14
+ # Save the audio data to a file using soundfile
15
+ sf.write(filename, audio_data, sr)
16
+ print(f"Audio saved as {filename}")
 
 
 
 
17
 
18
+ # Function to transcribe audio using Google's speech recognition
19
  def transcribe_audio(filename):
20
  recognizer = sr.Recognizer()
21
 
22
+ # Open the audio file using SpeechRecognition
23
  with sr.AudioFile(filename) as source:
24
  audio = recognizer.record(source)
25
 
26
  try:
27
+ print("Transcribing audio...")
28
+ # Use Google's speech recognition API
29
+ text = recognizer.recognize_google(audio)
30
+ print(f"Transcription: {text}")
31
+ return {"text": text}
32
  except sr.UnknownValueError:
33
+ return {"text": "Sorry, I couldn't understand the audio."}
34
  except sr.RequestError as e:
35
+ return {"text": f"Error in request: {e}"}
36
 
37
+ # Function to save text as an audio file using gTTS (Google Text-to-Speech)
38
  def save_text_as_audio(text, audio_filename):
39
+ print("Converting text to speech...")
40
+ tts = gTTS(text=text, lang='en', slow=False)
41
  tts.save(audio_filename)
42
+ print(f"Audio saved as {audio_filename}")
43
 
44
+ # Function to play audio using the system's default audio player
45
  def play_audio(filename):
46
+ print("Playing audio...")
47
+ os.system(f"start {filename}") # For Windows; for Linux/macOS, use `os.system(f"mpg321 {filename}")`