from utils import transcribe as transcribe_api # from SoundScribe.speakerID import find_user import sounddevice as sd import soundfile as sf import numpy as np import threading import whisper import time SAMPLE_RATE = 16000 CHANNELS = 1 BLOCKSIZE = 8000 DURATION = 0.5 THRESHOLD = 0.015 SILENT_THRESHOLD = 3 silence_duration = 0 output_file = sf.SoundFile( 'database/recording.wav', mode='w', samplerate=SAMPLE_RATE, channels=CHANNELS) transcription_in_progress = False queued = False first_run = True transcription_text = "" def transcribe(audio): if first_run: model = whisper.load_model("base") first_run = False result = model.transcribe(audio) transcription = result['text'] # user = find_user("database/recording.wav") user = "Vatsal" return transcription, user def transcription(): global transcription_in_progress global transcription_text transcription_text, user = transcribe_api('database/recording.wav') print("-"*100) print(f'Transcription: {transcription_text} from user {user}') print("-"*100) transcription_in_progress = False def listen(stream): global transcription_in_progress global queued global silence_duration global output_file audio_data, _ = output_file.write(audio_data) time.sleep(0.5) audio_data, _ = * SAMPLE_RATE)) output_file.write(audio_data) if float(np.abs(audio_data).mean()) > THRESHOLD: silence_duration = 0 if transcription_in_progress: print('Audio detected! Transcribing...') queued = True else: transcription_in_progress = True print('Audio detected! Transcribing...') threading.Thread(target=transcription).start() elif float(np.abs(audio_data).mean()) < THRESHOLD: silence_duration += BLOCKSIZE / float(SAMPLE_RATE) if silence_duration >= SILENT_THRESHOLD: if queued: transcription() queued = False with open('./database/input.txt', 'w', encoding="utf-8") as write_to: write_to.write(transcription_text[1:]) silence_duration = 0 output_file.close() audio_data = None output_file = sf.SoundFile( 'database/recording.wav', mode='w', samplerate=SAMPLE_RATE, channels=CHANNELS) def live_listen(): with sd.InputStream(channels=CHANNELS, blocksize=BLOCKSIZE, samplerate=SAMPLE_RATE) as stream: print("STARTING LIVE TRANSCRIPTION") while True: listen(stream) if __name__ == "__main__": live_listen()