shukdevdatta123 commited on
Commit
0cfe2ed
·
verified ·
1 Parent(s): f290fac

Update text_speech_utils.py

Browse files
Files changed (1) hide show
  1. text_speech_utils.py +36 -47
text_speech_utils.py CHANGED
@@ -1,52 +1,41 @@
1
- import openai
2
- import sounddevice as sd
3
- import audiofile as af
4
- from scipy.io.wavfile import write
5
  from gtts import gTTS
6
-
7
- import multiprocessing
8
- import pyttsx3
9
- import keyboard
10
-
11
- def say(text):
12
- p = multiprocessing.Process(target=pyttsx3.speak, args=(text,))
13
- p.start()
14
- while p.is_alive():
15
- if keyboard.is_pressed('enter'):
16
- p.terminate()
17
- else:
18
- continue
19
- p.join()
20
-
21
-
22
- def record_audio(filename, sec, sr = 44100):
23
- audio = sd.rec(int(sec * sr), samplerate=sr, channels=2, blocking=False)
24
- sd.wait()
25
- write(filename, sr, audio)
26
-
27
- def record_audio_manual(filename, sr = 44100):
28
- input(" ** Press enter to start recording **")
29
- audio = sd.rec(int(10 * sr), samplerate=sr, channels=2)
30
- input(" ** Press enter to stop recording **")
31
- sd.stop()
32
- write(filename, sr, audio)
33
-
34
- def play_audio(filename):
35
- signal, sr = af.read(filename)
36
- sd.play(signal, sr)
37
 
38
  def transcribe_audio(filename):
39
- audio_file= open(filename, "rb")
40
- transcript = openai.Audio.transcribe("whisper-1", audio_file)
41
- audio_file.close()
42
- return transcript
43
-
44
- def translate_audio(filename):
45
- audio_file= open(filename, "rb")
46
- translation = openai.Audio.translate("whisper-1", audio_file)
47
- audio_file.close()
48
- return translation
 
 
 
49
 
50
  def save_text_as_audio(text, audio_filename):
51
- myobj = gTTS(text=text, lang='en', slow=False)
52
- myobj.save(audio_filename)
 
 
 
 
 
1
+ import speech_recognition as sr
 
 
 
2
  from gtts import gTTS
3
+ import os
4
+ import wave
5
+
6
+ def record_audio(filename, sec=5, sr=44100):
7
+ # Initialize recognizer class (for recognizing speech)
8
+ recognizer = sr.Recognizer()
9
+
10
+ # Set the microphone for recording
11
+ with sr.Microphone() as source:
12
+ print("Recording... Speak now!")
13
+ recognizer.adjust_for_ambient_noise(source) # Adjust for ambient noise
14
+ audio = recognizer.listen(source, timeout=sec)
15
+
16
+ # Save the audio to a file
17
+ with open(filename, "wb") as f:
18
+ f.write(audio.get_wav_data())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  def transcribe_audio(filename):
21
+ recognizer = sr.Recognizer()
22
+
23
+ with sr.AudioFile(filename) as source:
24
+ audio = recognizer.record(source)
25
+
26
+ try:
27
+ # Recognize speech using Google's speech recognition
28
+ transcript = recognizer.recognize_google(audio)
29
+ return {"text": transcript}
30
+ except sr.UnknownValueError:
31
+ return {"text": "Sorry, I could not understand the audio."}
32
+ except sr.RequestError as e:
33
+ return {"text": f"Request failed; {e}"}
34
 
35
  def save_text_as_audio(text, audio_filename):
36
+ tts = gTTS(text=text, lang='en', slow=False)
37
+ tts.save(audio_filename)
38
+
39
+ def play_audio(filename):
40
+ # Play audio using the default system player (e.g., vlc, mplayer)
41
+ os.system(f"start {filename}") # For Windows; on Linux/macOS use `os.system(f"mpg321 {filename}")`