Spaces:
Sleeping
Sleeping
File size: 2,666 Bytes
7ff8b05 8bf0bb0 a703b74 9f5c443 496c5b5 9f5c443 496c5b5 54450a0 8bf0bb0 bd64ae8 7ff8b05 c75a5bc bd64ae8 802d13a 8bf0bb0 cb4f0e8 8bf0bb0 54450a0 5237203 9f5c443 8bf0bb0 cb4f0e8 9fb8da4 cb4f0e8 c75a5bc 9f5c443 9fb8da4 8bf0bb0 9f5c443 8bf0bb0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import streamlit as st
from stable_whisper import load_model
from stable_whisper import load_hf_whisper
from pydub import AudioSegment
import webvtt
import pysrt
import requests
import os
# Variables
#valid_api_token = st.secrets["API_TOKEN"]
st.title("Speech-to-Text")
with st.expander("README"):
st.write("This little tool accepts and audiofile. After choosing the model a WebVTT file will be generated. The content of the WebVTT will be shown and a user can choose to download it. This can be used as Subtitle file e.g. in Davinci Resolve Import Subtitles" )
# Upload audio file
uploaded_file = st.file_uploader("Upload Audio File", type=["mp3", "wav", "mov"])
# Free tier or API token option
use_free_tier = st.checkbox("Free Tier (Max 2 minutes)")
api_token = st.text_input("API Token (Unlimited)")
# Should we translate to english?
translate = st.checkbox("Would you like a translation to english?")
# Model selection
model_size = st.selectbox("Model Size", ("tiny", "base", "small", "medium"))
def transcribe_to_subtitle(audio_bytes, model_name):
"""Transcribe audio to subtitle using OpenAI Whisper"""
# Load model based on selection
model = load_model(model_name)
#speedmodel = load_hf_whisper(model_name)
# Check how long the audio is free tier
# newAudio = AudioSegment.from_wav("audiofiles/download.wav")
#if use_free_tier and len(audio_bytes) > 0.048 * 2 * 60 * 1024:
# st.error(len(audio_bytes))
# st.error("Free tier only supports audio files under 2 minutes")
# return
# Transcribe audio
try:
if translate:
result = model.transcribe(audio_bytes, verbose=True, task = 'translate')
result.to_srt_vtt('audio.srt')
else:
result = model.transcribe(audio_bytes, verbose=True)
result.to_srt_vtt('audio.srt')
except Exception as e:
return {"error": f"Error during transcription: {str(e)}"}
captions = pysrt.open("audio.srt")
for caption in captions:
print(caption.start)
print(caption.text)
print(caption.end)
print()
output = captions.text
st.markdown(output, unsafe_allow_html=True)
# Download option
st.success("Transcription successful! Download subtitle file?")
with open("audio.srt", "rb") as f:
st.download_button("Download Subtitle in WebVtt Format", f, "audio.srt")
os.remove("audio.srt") # Remove temporary file
if uploaded_file is not None:
audio_bytes = uploaded_file.read()
# Check for API token if free tier is not selected
if not use_free_tier and not api_token:
st.error("API token required for non-free tier usage")
else:
transcribe_to_subtitle(audio_bytes, model_size) |