Spaces:

snyamson
/

audio_to_text_transcription

Runtime error

App Files Files Community

audio_to_text_transcription / app.py

snyamson

Update app.py

69bd4f7 over 1 year ago

raw

history blame contribute delete

1.79 kB

	import streamlit as st
	import torchaudio
	from transformers import WhisperProcessor, WhisperForConditionalGeneration

	# Load the Whisper model and processor
	processor = WhisperProcessor.from_pretrained("openai/whisper-tiny.en")
	model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")

	# Title
	st.title('Audio to Text Transcription')

	# Sidebar for file upload
	st.sidebar.title("Upload your audio file")
	uploaded_file = st.sidebar.file_uploader("Choose an audio file", type=["mp3", "wav", "mp4", "m4a"])

	if uploaded_file:
	st.sidebar.audio(uploaded_file)

	# Process the uploaded file
	audio_tensor, sampling_rate = torchaudio.load(uploaded_file)
	resampler = torchaudio.transforms.Resample(sampling_rate, 16000)
	resampled_waveform = resampler(audio_tensor)

	segment_duration = 120 # Segment duration in seconds (2 minutes)
	num_segments = len(resampled_waveform[0]) // (segment_duration * 16000)
	segment_transcriptions = []

	# Transcribe each segment
	for i in range(num_segments):
	start = i * segment_duration * 16000
	end = min(len(resampled_waveform[0]), (i + 1) * segment_duration * 16000)
	segment = resampled_waveform[0][start:end]

	# Transcribe the segment
	input_features = processor(
	segment, sampling_rate=16000, return_tensors="pt"
	).input_features

	predicted_ids = model.generate(input_features)
	transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)

	segment_transcriptions.append(transcription[0])

	# Combine segment transcriptions into the full transcript
	full_transcript = " ".join(segment_transcriptions)

	# Display the transcript
	st.header("Transcription")
	st.write(full_transcript)