Spaces:

ammansik
/

youtube_summarizer

Sleeping

youtube_summarizer / audio_to_text.py

fix last bugs

db1b155 over 1 year ago

1.49 kB

	import torch
	from transformers import pipeline

	EOS_TOKENS = [".", "!", "?"]


	def transcribe_audio(audio_fpath, max_snt_len=100):
	pipe = pipeline("automatic-speech-recognition",
	"openai/whisper-small",
	torch_dtype=torch.float16,
	device="cuda:0")

	pipe.model = pipe.model.to_bettertransformer()

	outputs = pipe(audio_fpath,
	chunk_length_s=30,
	batch_size=8,
	return_timestamps=True)

	sentences = []
	snt_start = None
	snt = ""
	for segment in outputs["chunks"]:
	snt += f'{segment["text"]} '
	start_time, end_time = segment["timestamp"]
	if end_time is None:
	end_time = start_time + 0.1
	if not snt_start:
	snt_start = start_time
	if (
	segment["text"].strip().split()[-1][-1] in EOS_TOKENS
	or len(snt) > max_snt_len
	):
	sentences.append(
	{"text": snt.strip(), "start": snt_start, "end": end_time}
	)
	snt_start = None
	snt = ""

	if len(snt) > 0:
	sentences.append(
	{"text": snt.strip(), "start": snt_start, "end": end_time}
	)
	snt_start = None
	snt = ""

	timestamped_text = ""
	for sentence in sentences:
	timestamped_text += (
	f'{sentence["start"]} {sentence["end"]} {sentence["text"]}\n'
	)
	return timestamped_text