crystal-technologies
/

CRYSTAL-R1

Model card Files Files and versions Community

CRYSTAL-R1 / SoundScribe /transcribe.py

crystal-technologies's picture

crystal-technologies

Upload 1653 files

714d948 over 1 year ago

history blame contribute delete

2.69 kB

	from utils import transcribe as transcribe_api
	# from SoundScribe.speakerID import find_user
	import sounddevice as sd
	import soundfile as sf
	import numpy as np
	import threading
	import whisper
	import time

	SAMPLE_RATE = 16000
	CHANNELS = 1
	BLOCKSIZE = 8000
	DURATION = 0.5
	THRESHOLD = 0.015
	SILENT_THRESHOLD = 3
	silence_duration = 0
	output_file = sf.SoundFile(
	'database/recording.wav', mode='w', samplerate=SAMPLE_RATE, channels=CHANNELS)

	transcription_in_progress = False
	queued = False
	first_run = True
	transcription_text = ""


	def transcribe(audio):
	if first_run:
	model = whisper.load_model("base")
	first_run = False
	result = model.transcribe(audio)
	transcription = result['text']
	# user = find_user("database/recording.wav")
	user = "Vatsal"
	return transcription, user


	def transcription():
	global transcription_in_progress
	global transcription_text
	transcription_text, user = transcribe_api('database/recording.wav')
	print("-"*100)
	print(f'Transcription: {transcription_text} from user {user}')
	print("-"*100)
	transcription_in_progress = False

	def listen(stream):
	global transcription_in_progress
	global queued
	global silence_duration
	global output_file

	audio_data, _ = stream.read(BLOCKSIZE)

	output_file.write(audio_data)

	time.sleep(0.5)

	audio_data, _ = stream.read(int(DURATION * SAMPLE_RATE))
	output_file.write(audio_data)
	if float(np.abs(audio_data).mean()) > THRESHOLD:
	silence_duration = 0
	if transcription_in_progress:
	print('Audio detected! Transcribing...')
	queued = True
	else:
	transcription_in_progress = True
	print('Audio detected! Transcribing...')
	threading.Thread(target=transcription).start()

	elif float(np.abs(audio_data).mean()) < THRESHOLD:
	silence_duration += BLOCKSIZE / float(SAMPLE_RATE)
	if silence_duration >= SILENT_THRESHOLD:
	if queued:
	transcription()
	queued = False

	with open('./database/input.txt', 'w', encoding="utf-8") as write_to:
	write_to.write(transcription_text[1:])
	silence_duration = 0
	output_file.close()
	audio_data = None
	output_file = sf.SoundFile(
	'database/recording.wav', mode='w', samplerate=SAMPLE_RATE, channels=CHANNELS)


	def live_listen():
	with sd.InputStream(channels=CHANNELS, blocksize=BLOCKSIZE, samplerate=SAMPLE_RATE) as stream:
	print("STARTING LIVE TRANSCRIPTION")
	while True:
	listen(stream)


	if __name__ == "__main__":
	live_listen()