Spaces:

vericudebuget
/

subtitle-generator

Runtime error

App Files Files Community

subtitle-generator / app.py

vericudebuget

Update app.py

ca365ff verified 10 months ago

raw

history blame contribute delete

3.69 kB

	# requirements.txt


	# app.py
	import streamlit as st
	import torch
	from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
	import tempfile
	import os
	from moviepy.editor import VideoFileClip
	import datetime

	def create_srt(chunks):
	srt_content = ""
	for i, chunk in enumerate(chunks, start=1):
	start_time = str(datetime.timedelta(seconds=chunk['timestamp'][0]))
	end_time = str(datetime.timedelta(seconds=chunk['timestamp'][1]))
	# Ensure proper SRT timestamp format (HH:MM:SS,mmm)
	start_time = start_time.rstrip('0').rstrip('.') + ',000' if '.' in start_time else start_time + ',000'
	end_time = end_time.rstrip('0').rstrip('.') + ',000' if '.' in end_time else end_time + ',000'

	srt_content += f"{i}\n{start_time} --> {end_time}\n{chunk['text']}\n\n"
	return srt_content

	def extract_audio(video_path):
	with VideoFileClip(video_path) as video:
	audio = video.audio
	_, temp_audio_path = tempfile.mkstemp(suffix='.mp3')
	audio.write_audiofile(temp_audio_path)
	return temp_audio_path

	def setup_model():
	device = "cpu"
	torch_dtype = torch.float32

	model_id = "openai/whisper-tiny"

	model = AutoModelForSpeechSeq2Seq.from_pretrained(
	model_id,
	torch_dtype=torch_dtype,
	low_cpu_mem_usage=True,
	use_safetensors=True
	)
	model.to(device)

	processor = AutoProcessor.from_pretrained(model_id)

	pipe = pipeline(
	"automatic-speech-recognition",
	model=model,
	tokenizer=processor.tokenizer,
	feature_extractor=processor.feature_extractor,
	torch_dtype=torch_dtype,
	device=device,
	)

	return pipe

	def main():
	st.title("Audio/Video Transcription App")

	# Initialize session state for model
	if 'pipe' not in st.session_state:
	with st.spinner("Loading model... This might take a few minutes."):
	st.session_state.pipe = setup_model()

	uploaded_file = st.file_uploader("Upload an audio or video file", type=['mp3', 'wav', 'mp4', 'avi', 'mov'])

	if uploaded_file is not None:
	with st.spinner("Processing file..."):
	# Save uploaded file temporarily
	temp_dir = tempfile.mkdtemp()
	temp_path = os.path.join(temp_dir, uploaded_file.name)

	with open(temp_path, 'wb') as f:
	f.write(uploaded_file.getvalue())

	# Extract audio if it's a video file
	if uploaded_file.type.startswith('video'):
	audio_path = extract_audio(temp_path)
	else:
	audio_path = temp_path

	# Transcribe
	generate_kwargs = {
	"return_timestamps": True
	}

	result = st.session_state.pipe(
	audio_path,
	generate_kwargs=generate_kwargs,
	chunk_length_s=30,
	batch_size=8
	)

	# Display results
	st.subheader("Transcription:")
	st.write(result["text"])

	# Create and offer SRT download
	srt_content = create_srt(result["chunks"])
	st.download_button(
	label="Download SRT file",
	data=srt_content,
	file_name="transcription.srt",
	mime="text/plain"
	)

	# Cleanup
	os.remove(temp_path)
	if uploaded_file.type.startswith('video'):
	os.remove(audio_path)
	os.rmdir(temp_dir)

	if __name__ == "__main__":
	main()