Spaces:

TIMBOVILL
/

UltraSingerUI

Sleeping

App Files Files Community

UltraSingerUI / src /modules /Speech_Recognition /speech_recognition.py

TIMBOVILL

Upload 3 files

80d8416 verified about 1 year ago

raw

history blame contribute delete

3.28 kB

	"""Docstring"""

	import os

	import speech_recognition as sr
	from pydub import AudioSegment
	from pydub.silence import split_on_silence

	from modules.console_colors import ULTRASINGER_HEAD

	# todo: Code from here: https://www.thepythoncode.com/article/using-speech-recognition-to-convert-speech-to-text-python


	def print_text(wav_file):
	"""Docstring"""

	# English speech!
	recognizer = sr.Recognizer()

	# open the file
	with sr.AudioFile(wav_file) as source:
	# listen for the data (load audio to memory)
	audio_data = recognizer.record(source)
	# recognize (convert from speech to text)
	text = recognizer.recognize_google(audio_data)

	print(text)


	def get_large_audio_transcription(wav_file):
	"""
	Splitting the large audio file into chunks
	and apply speech recognition on each of these chunks
	"""
	# open the audio file using pydub
	sound = AudioSegment.from_wav(wav_file)

	# split audio sound where silence is 700 miliseconds or more and get chunks
	chunks = split_on_silence(
	sound,
	# experiment with this value for your target audio file
	min_silence_len=500,
	# adjust this per requirement
	silence_thresh=sound.dBFS - 14,
	# keep the silence for 1 second, adjustable as well
	keep_silence=500,
	)

	folder_name = "audio-chunks"
	# create a directory to store the audio chunks
	if not os.path.isdir(folder_name):
	os.mkdir(folder_name)
	whole_text = ""

	recognizer = sr.Recognizer()

	# process each chunk
	for i, audio_chunk in enumerate(chunks, start=1):
	# export audio chunk and save it in
	# the `folder_name` directory.
	chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
	audio_chunk.export(chunk_filename, format="wav")
	# recognize the chunk
	with sr.AudioFile(chunk_filename) as source:
	audio_listened = recognizer.record(source)
	# try converting it to text
	try:
	text = recognizer.recognize_google(audio_listened)
	except sr.UnknownValueError as error:
	print("Error:", str(error))
	else:
	text = f"{text.capitalize()}. "
	print(chunk_filename, ":", text)
	whole_text += text
	# return the text for all chunks detected
	return whole_text


	def transcribe_audio(audio_file):
	"""Docstring"""

	recognizer = sr.Recognizer()
	with sr.AudioFile(audio_file) as source:
	audio = recognizer.record(source)
	try:
	transcript = recognizer.recognize_google(audio, show_all=True)
	start_time = transcript["result"][0]["alternative"][0]["words"][0][
	"startTime"
	]
	end_time = transcript["result"][0]["alternative"][0]["words"][-1][
	"endTime"
	]
	return (
	transcript["result"][0]["alternative"][0]["transcript"],
	start_time,
	end_time,
	)
	except sr.UnknownValueError:
	print(f"{ULTRASINGER_HEAD} Could not understand audio")
	except sr.RequestError as error:
	print(f"{ULTRASINGER_HEAD} Error with recognizing service; {error}")


	class SpeechToText:
	"""Docstring"""