File size: 2,415 Bytes
7ff8b05
8bf0bb0
a703b74
9f5c443
 
 
496c5b5
 
 
 
9f5c443
496c5b5
54450a0
 
 
 
 
8bf0bb0
 
 
bd64ae8
 
 
7ff8b05
bd64ae8
 
802d13a
8bf0bb0
 
 
 
cb4f0e8
8bf0bb0
54450a0
5237203
9f5c443
 
 
 
8bf0bb0
 
cb4f0e8
 
9f5c443
cb4f0e8
 
8bf0bb0
9f5c443
 
 
 
 
 
 
 
 
 
 
8bf0bb0
 
 
9f5c443
 
 
8bf0bb0
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import streamlit as st
from stable_whisper import load_model
from stable_whisper import load_hf_whisper
from pydub import AudioSegment
import webvtt
import pysrt
import requests
import os

# Variables
#valid_api_token = st.secrets["API_TOKEN"]

st.title("Speech-to-Text")

with st.expander("README"):
  st.write("This little tool accepts and audiofile. After choosing the model a WebVTT file will be generated. The content of the WebVTT will be shown and a user can choose to download it. This can be used as Subtitle file e.g. in Davinci Resolve Import Subtitles" )

# Upload audio file
uploaded_file = st.file_uploader("Upload Audio File", type=["mp3", "wav", "mov"])

# Free tier or API token option
use_free_tier = st.checkbox("Free Tier (Max 2 minutes)")
api_token = st.text_input("API Token (Unlimited)")

# Model selection
model_size = st.selectbox("Model Size", ("tiny", "base", "small", "medium"))

def transcribe_to_subtitle(audio_bytes, model_name):
  """Transcribe audio to subtitle using OpenAI Whisper"""
  # Load model based on selection
  model = load_model(model_name)
  #speedmodel = load_hf_whisper(model_name)
  
  # Check how long the audio is free tier
  # newAudio = AudioSegment.from_wav("audiofiles/download.wav")
  #if use_free_tier and len(audio_bytes) > 0.048 * 2 * 60 * 1024:
  #    st.error(len(audio_bytes))
  #    st.error("Free tier only supports audio files under 2 minutes")
  #    return
  
  #  Transcribe audio
  try:
    result = model.transcribe(audio_bytes, verbose=True)
    result.to_srt_vtt('audio.srt')
  except Exception as e:
     return {"error": f"Error during transcription: {str(e)}"}
  
  captions = pysrt.open("audio.srt")
  for caption in captions:
     print(caption.start)
     print(caption.text)
     print(caption.end)
     print()
    
  output = captions.text
  st.markdown(output, unsafe_allow_html=True)

      
  
  # Download option
  st.success("Transcription successful! Download subtitle file?")
  with open("audio.srt", "rb") as f:
      st.download_button("Download Subtitle in WebVtt Format", f, "audio.srt")
  os.remove("audio.srt")  # Remove temporary file

if uploaded_file is not None:
  audio_bytes = uploaded_file.read()
  # Check for API token if free tier is not selected
  if not use_free_tier and not api_token:
      st.error("API token required for non-free tier usage")
  else:
      transcribe_to_subtitle(audio_bytes, model_size)