vericudebuget commited on
Commit
dc800de
·
verified ·
1 Parent(s): d00758a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -0
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
+ from pydub import AudioSegment
4
+ import tempfile
5
+ import torch
6
+ import os
7
+
8
+ # Set the device to CPU only
9
+ device = "cpu"
10
+ torch_dtype = torch.float32
11
+
12
+ # Load the Whisper model and processor
13
+ model_id = "openai/whisper-large-v3-turbo"
14
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
15
+ model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
16
+ ).to(device)
17
+ processor = AutoProcessor.from_pretrained(model_id)
18
+
19
+ # Create the pipeline for transcription
20
+ pipe = pipeline(
21
+ "automatic-speech-recognition",
22
+ model=model,
23
+ tokenizer=processor.tokenizer,
24
+ feature_extractor=processor.feature_extractor,
25
+ torch_dtype=torch_dtype,
26
+ device=device,
27
+ )
28
+
29
+ st.title("Audio/Video Transcription App")
30
+
31
+ # File upload
32
+ uploaded_file = st.file_uploader("Upload an audio or video file", type=["mp3", "wav", "mp4", "m4a"])
33
+
34
+ if uploaded_file is not None:
35
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
36
+ # If it's a video, extract audio
37
+ if uploaded_file.name.endswith(("mp4", "m4a")):
38
+ audio = AudioSegment.from_file(uploaded_file)
39
+ audio.export(temp_audio.name, format="wav")
40
+ else:
41
+ audio = AudioSegment.from_file(uploaded_file)
42
+ audio.export(temp_audio.name, format="wav")
43
+
44
+ # Run the transcription
45
+ transcription_result = pipe(temp_audio.name, return_timestamps="word")
46
+
47
+ # Extract text and timestamps
48
+ transcription_text = transcription_result['text']
49
+ transcription_chunks = transcription_result['chunks']
50
+
51
+ # Display transcription
52
+ st.subheader("Transcription")
53
+ st.write(transcription_text)
54
+
55
+ # Generate SRT file
56
+ srt_content = ""
57
+ for i, chunk in enumerate(transcription_chunks, start=1):
58
+ start_time = chunk["timestamp"][0]
59
+ end_time = chunk["timestamp"][1]
60
+ text = chunk["text"]
61
+
62
+ # Format time for SRT (hours, minutes, seconds, milliseconds)
63
+ def format_srt_time(seconds):
64
+ hours, remainder = divmod(seconds, 3600)
65
+ minutes, seconds = divmod(remainder, 60)
66
+ milliseconds = int((seconds % 1) * 1000)
67
+ seconds = int(seconds)
68
+ return f"{int(hours):02}:{int(minutes):02}:{seconds:02},{milliseconds:03}"
69
+
70
+ srt_content += f"{i}\n"
71
+ srt_content += f"{format_srt_time(start_time)} --> {format_srt_time(end_time)}\n"
72
+ srt_content += f"{text}\n\n"
73
+
74
+ # Save the SRT file
75
+ srt_path = tempfile.mktemp(suffix=".srt")
76
+ with open(srt_path, "w") as srt_file:
77
+ srt_file.write(srt_content)
78
+
79
+ # Provide download for SRT file
80
+ st.subheader("Download SRT File")
81
+ with open(srt_path, "rb") as file:
82
+ st.download_button(
83
+ label="Download SRT",
84
+ data=file,
85
+ file_name="transcription.srt",
86
+ mime="text/plain"
87
+ )