Spaces:

shukdevdatta123
/

VoiceChat

Running

shukdevdatta123 commited on Jan 7

Commit

27541b9

verified ·

1 Parent(s): 9847686

Update text_speech_utils.py

Files changed (1) hide show

text_speech_utils.py CHANGED Viewed

@@ -1,9 +1,12 @@
 import sounddevice as sd
 import soundfile as sf
-import speech_recognition as sr
 from gtts import gTTS
 import os
 # Function to record audio using sounddevice and save it as a .wav file
 def record_audio(filename, sec=5, sr=44100):
     print("Recording...")
@@ -15,24 +18,13 @@ def record_audio(filename, sec=5, sr=44100):
     sf.write(filename, audio_data, sr)
     print(f"Audio saved as {filename}")
-# Function to transcribe audio using Google's speech recognition
 def transcribe_audio(filename):
-    recognizer = sr.Recognizer()
-    # Open the audio file using SpeechRecognition
-    with sr.AudioFile(filename) as source:
-        audio = recognizer.record(source)
-    try:
-        print("Transcribing audio...")
-        # Use Google's speech recognition API
-        text = recognizer.recognize_google(audio)
-        print(f"Transcription: {text}")
-        return {"text": text}
-    except sr.UnknownValueError:
-        return {"text": "Sorry, I couldn't understand the audio."}
-    except sr.RequestError as e:
-        return {"text": f"Error in request: {e}"}
 # Function to save text as an audio file using gTTS (Google Text-to-Speech)
 def save_text_as_audio(text, audio_filename):

 import sounddevice as sd
 import soundfile as sf
+import whisper
 from gtts import gTTS
 import os
+# Load the Whisper model
+model = whisper.load_model("base")  # You can use other versions like "small", "medium", or "large"
 # Function to record audio using sounddevice and save it as a .wav file
 def record_audio(filename, sec=5, sr=44100):
     print("Recording...")
     sf.write(filename, audio_data, sr)
     print(f"Audio saved as {filename}")
+# Function to transcribe audio using Whisper
 def transcribe_audio(filename):
+    print("Transcribing audio...")
+    result = model.transcribe(filename)
+    text = result['text']
+    print(f"Transcription: {text}")
+    return {"text": text}
 # Function to save text as an audio file using gTTS (Google Text-to-Speech)
 def save_text_as_audio(text, audio_filename):