shukdevdatta123 commited on
Commit
27541b9
·
verified ·
1 Parent(s): 9847686

Update text_speech_utils.py

Browse files
Files changed (1) hide show
  1. text_speech_utils.py +10 -18
text_speech_utils.py CHANGED
@@ -1,9 +1,12 @@
1
  import sounddevice as sd
2
  import soundfile as sf
3
- import speech_recognition as sr
4
  from gtts import gTTS
5
  import os
6
 
 
 
 
7
  # Function to record audio using sounddevice and save it as a .wav file
8
  def record_audio(filename, sec=5, sr=44100):
9
  print("Recording...")
@@ -15,24 +18,13 @@ def record_audio(filename, sec=5, sr=44100):
15
  sf.write(filename, audio_data, sr)
16
  print(f"Audio saved as {filename}")
17
 
18
- # Function to transcribe audio using Google's speech recognition
19
  def transcribe_audio(filename):
20
- recognizer = sr.Recognizer()
21
-
22
- # Open the audio file using SpeechRecognition
23
- with sr.AudioFile(filename) as source:
24
- audio = recognizer.record(source)
25
-
26
- try:
27
- print("Transcribing audio...")
28
- # Use Google's speech recognition API
29
- text = recognizer.recognize_google(audio)
30
- print(f"Transcription: {text}")
31
- return {"text": text}
32
- except sr.UnknownValueError:
33
- return {"text": "Sorry, I couldn't understand the audio."}
34
- except sr.RequestError as e:
35
- return {"text": f"Error in request: {e}"}
36
 
37
  # Function to save text as an audio file using gTTS (Google Text-to-Speech)
38
  def save_text_as_audio(text, audio_filename):
 
1
  import sounddevice as sd
2
  import soundfile as sf
3
+ import whisper
4
  from gtts import gTTS
5
  import os
6
 
7
+ # Load the Whisper model
8
+ model = whisper.load_model("base") # You can use other versions like "small", "medium", or "large"
9
+
10
  # Function to record audio using sounddevice and save it as a .wav file
11
  def record_audio(filename, sec=5, sr=44100):
12
  print("Recording...")
 
18
  sf.write(filename, audio_data, sr)
19
  print(f"Audio saved as {filename}")
20
 
21
+ # Function to transcribe audio using Whisper
22
  def transcribe_audio(filename):
23
+ print("Transcribing audio...")
24
+ result = model.transcribe(filename)
25
+ text = result['text']
26
+ print(f"Transcription: {text}")
27
+ return {"text": text}
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  # Function to save text as an audio file using gTTS (Google Text-to-Speech)
30
  def save_text_as_audio(text, audio_filename):