Spaces:

snyamson
/

audio_to_text_transcription

Runtime error

App Files Files Community

snyamson commited on Dec 21, 2023

Commit

49a31b4

1 Parent(s): 1c4efab

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -0

app.py CHANGED Viewed

	@@ -0,0 +1,46 @@

+import streamlit as st
+import torchaudio
+from transformers import WhisperProcessor, WhisperForConditionalGeneration
+# Load the Whisper model and processor
+processor = WhisperProcessor.from_pretrained("openai/whisper-tiny.en")
+model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
+# Sidebar for file upload
+st.sidebar.title("Upload your audio file")
+uploaded_file = st.sidebar.file_uploader("Choose an audio file", type=["mp3", "wav", "mp4"])
+if uploaded_file:
+    st.sidebar.audio(uploaded_file)
+    # Process the uploaded file
+    audio_tensor, sampling_rate = torchaudio.load(uploaded_file)
+    resampler = torchaudio.transforms.Resample(sampling_rate, 16000)
+    resampled_waveform = resampler(audio_tensor)
+    segment_duration = 120  # Segment duration in seconds (2 minutes)
+    num_segments = len(resampled_waveform[0]) // (segment_duration * 16000)
+    segment_transcriptions = []
+    # Transcribe each segment
+    for i in range(num_segments):
+        start = i * segment_duration * 16000
+        end = min(len(resampled_waveform[0]), (i + 1) * segment_duration * 16000)
+        segment = resampled_waveform[0][start:end]
+        # Transcribe the segment
+        input_features = processor(
+            segment, sampling_rate=16000, return_tensors="pt"
+        ).input_features
+        predicted_ids = model.generate(input_features)
+        transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
+        segment_transcriptions.append(transcription[0])
+    # Combine segment transcriptions into the full transcript
+    full_transcript = " ".join(segment_transcriptions)
+    # Display the transcript
+    st.header("Transcription")
+    st.write(full_transcript)