File size: 2,666 Bytes
7ff8b05
8bf0bb0
a703b74
9f5c443
 
 
496c5b5
 
 
 
9f5c443
496c5b5
54450a0
 
 
 
 
8bf0bb0
 
 
bd64ae8
 
 
7ff8b05
c75a5bc
 
 
bd64ae8
 
802d13a
8bf0bb0
 
 
 
cb4f0e8
8bf0bb0
54450a0
5237203
9f5c443
 
 
 
8bf0bb0
 
cb4f0e8
9fb8da4
 
 
 
 
 
cb4f0e8
 
c75a5bc
9f5c443
 
 
 
 
 
 
 
 
9fb8da4
8bf0bb0
 
9f5c443
 
 
8bf0bb0
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import streamlit as st
from stable_whisper import load_model
from stable_whisper import load_hf_whisper
from pydub import AudioSegment
import webvtt
import pysrt
import requests
import os

# Variables
#valid_api_token = st.secrets["API_TOKEN"]

st.title("Speech-to-Text")

with st.expander("README"):
  st.write("This little tool accepts and audiofile. After choosing the model a WebVTT file will be generated. The content of the WebVTT will be shown and a user can choose to download it. This can be used as Subtitle file e.g. in Davinci Resolve Import Subtitles" )

# Upload audio file
uploaded_file = st.file_uploader("Upload Audio File", type=["mp3", "wav", "mov"])

# Free tier or API token option
use_free_tier = st.checkbox("Free Tier (Max 2 minutes)")
api_token = st.text_input("API Token (Unlimited)")

# Should we translate to english? 
translate = st.checkbox("Would you like a translation to english?")

# Model selection
model_size = st.selectbox("Model Size", ("tiny", "base", "small", "medium"))

def transcribe_to_subtitle(audio_bytes, model_name):
  """Transcribe audio to subtitle using OpenAI Whisper"""
  # Load model based on selection
  model = load_model(model_name)
  #speedmodel = load_hf_whisper(model_name)
  
  # Check how long the audio is free tier
  # newAudio = AudioSegment.from_wav("audiofiles/download.wav")
  #if use_free_tier and len(audio_bytes) > 0.048 * 2 * 60 * 1024:
  #    st.error(len(audio_bytes))
  #    st.error("Free tier only supports audio files under 2 minutes")
  #    return
  
  #  Transcribe audio
  try:
    if translate: 
      result = model.transcribe(audio_bytes, verbose=True, task = 'translate')
      result.to_srt_vtt('audio.srt')     
    else:
      result = model.transcribe(audio_bytes, verbose=True)
      result.to_srt_vtt('audio.srt')
  except Exception as e:
     return {"error": f"Error during transcription: {str(e)}"}
    
  captions = pysrt.open("audio.srt")
  for caption in captions:
     print(caption.start)
     print(caption.text)
     print(caption.end)
     print()
    
  output = captions.text
  st.markdown(output, unsafe_allow_html=True)
 
  # Download option
  st.success("Transcription successful! Download subtitle file?")
  with open("audio.srt", "rb") as f:
      st.download_button("Download Subtitle in WebVtt Format", f, "audio.srt")
  os.remove("audio.srt")  # Remove temporary file

if uploaded_file is not None:
  audio_bytes = uploaded_file.read()
  # Check for API token if free tier is not selected
  if not use_free_tier and not api_token:
      st.error("API token required for non-free tier usage")
  else:
      transcribe_to_subtitle(audio_bytes, model_size)