Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
-
|
2 |
import streamlit as st
|
3 |
import openai
|
4 |
import os
|
5 |
from pydub import AudioSegment
|
|
|
6 |
from dotenv import load_dotenv
|
7 |
from tempfile import NamedTemporaryFile
|
8 |
import math
|
@@ -14,54 +14,37 @@ load_dotenv()
|
|
14 |
# Set your OpenAI API key
|
15 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
16 |
|
17 |
-
def
|
18 |
-
"""
|
19 |
-
Calculate the length of each chunk in milliseconds to create chunks of approximately target_size_mb.
|
20 |
-
|
21 |
-
Args:
|
22 |
-
file_path (str): Path to the audio file.
|
23 |
-
target_size_mb (int): Target size of each chunk in megabytes.
|
24 |
-
|
25 |
-
Returns:
|
26 |
-
int: Chunk length in milliseconds.
|
27 |
-
"""
|
28 |
-
audio = AudioSegment.from_file(file_path)
|
29 |
-
file_size_bytes = os.path.getsize(file_path)
|
30 |
-
duration_ms = len(audio)
|
31 |
-
|
32 |
-
# Calculate the approximate duration per byte
|
33 |
-
duration_per_byte = duration_ms / file_size_bytes
|
34 |
-
|
35 |
-
# Calculate the chunk length in milliseconds for the target size
|
36 |
-
chunk_length_ms = target_size_mb * 1024 * 1024 * duration_per_byte
|
37 |
-
return math.floor(chunk_length_ms)
|
38 |
-
|
39 |
-
def split_audio(audio_file_path, chunk_length_ms):
|
40 |
"""
|
41 |
-
Split an audio file into chunks
|
42 |
-
|
43 |
Args:
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
|
|
47 |
Returns:
|
48 |
-
|
49 |
"""
|
50 |
audio = AudioSegment.from_file(audio_file_path)
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
54 |
return chunks
|
55 |
|
56 |
def transcribe(audio_file):
|
57 |
"""
|
58 |
-
Transcribe an audio file using OpenAI Whisper model.
|
59 |
|
60 |
Args:
|
61 |
-
|
62 |
|
63 |
Returns:
|
64 |
-
|
65 |
"""
|
66 |
with open(audio_file, "rb") as audio:
|
67 |
response = openai.audio.transcriptions.create(
|
@@ -77,10 +60,10 @@ def process_audio_chunks(audio_chunks):
|
|
77 |
Process and transcribe each audio chunk.
|
78 |
|
79 |
Args:
|
80 |
-
|
81 |
|
82 |
Returns:
|
83 |
-
|
84 |
"""
|
85 |
transcriptions = []
|
86 |
min_length_ms = 100 # Minimum length required by OpenAI API (0.1 seconds)
|
@@ -107,11 +90,11 @@ def save_transcription_to_docx(transcription, audio_file_path):
|
|
107 |
Save the transcription as a .docx file.
|
108 |
|
109 |
Args:
|
110 |
-
|
111 |
-
|
112 |
|
113 |
Returns:
|
114 |
-
|
115 |
"""
|
116 |
# Extract the base name of the audio file (without extension)
|
117 |
base_name = os.path.splitext(os.path.basename(audio_file_path))[0]
|
@@ -132,10 +115,9 @@ def save_transcription_to_docx(transcription, audio_file_path):
|
|
132 |
|
133 |
st.title("Audio Transcription with OpenAI's Whisper")
|
134 |
|
135 |
-
#
|
136 |
uploaded_file = st.file_uploader("Upload an audio or video file", type=["wav", "mp3", "ogg", "m4a", "mp4", "mov"])
|
137 |
|
138 |
-
|
139 |
if 'transcription' not in st.session_state:
|
140 |
st.session_state.transcription = None
|
141 |
|
@@ -144,15 +126,19 @@ if uploaded_file is not None and st.session_state.transcription is None:
|
|
144 |
|
145 |
# Save uploaded file temporarily
|
146 |
file_extension = uploaded_file.name.split(".")[-1]
|
147 |
-
original_file_name = uploaded_file.name.rsplit('.', 1)[0] # Get
|
148 |
temp_audio_file = f"temp_audio_file.{file_extension}"
|
149 |
with open(temp_audio_file, "wb") as f:
|
150 |
f.write(uploaded_file.getbuffer())
|
151 |
|
152 |
-
# Split and process audio
|
153 |
with st.spinner('Transcribing...'):
|
154 |
-
|
155 |
-
|
|
|
|
|
|
|
|
|
156 |
transcription = process_audio_chunks(audio_chunks)
|
157 |
if transcription:
|
158 |
st.session_state.transcription = transcription
|
@@ -177,4 +163,3 @@ if st.session_state.transcription:
|
|
177 |
file_name=st.session_state.output_docx_file,
|
178 |
mime='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
179 |
)
|
180 |
-
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import openai
|
3 |
import os
|
4 |
from pydub import AudioSegment
|
5 |
+
from pydub.silence import split_on_silence
|
6 |
from dotenv import load_dotenv
|
7 |
from tempfile import NamedTemporaryFile
|
8 |
import math
|
|
|
14 |
# Set your OpenAI API key
|
15 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
16 |
|
17 |
+
def split_audio_on_silence(audio_file_path, min_silence_len=500, silence_thresh=-40, keep_silence=250):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
"""
|
19 |
+
Split an audio file into chunks using silence detection.
|
20 |
+
|
21 |
Args:
|
22 |
+
audio_file_path (str): Path to the audio file.
|
23 |
+
min_silence_len (int): Minimum length of silence (in ms) required to be used as a split point.
|
24 |
+
silence_thresh (int): The volume (in dBFS) below which is considered silence.
|
25 |
+
keep_silence (int): Amount of silence (in ms) to retain at the beginning and end of each chunk.
|
26 |
+
|
27 |
Returns:
|
28 |
+
list: List of AudioSegment chunks.
|
29 |
"""
|
30 |
audio = AudioSegment.from_file(audio_file_path)
|
31 |
+
chunks = split_on_silence(
|
32 |
+
audio,
|
33 |
+
min_silence_len=min_silence_len,
|
34 |
+
silence_thresh=silence_thresh,
|
35 |
+
keep_silence=keep_silence
|
36 |
+
)
|
37 |
return chunks
|
38 |
|
39 |
def transcribe(audio_file):
|
40 |
"""
|
41 |
+
Transcribe an audio file using the OpenAI Whisper model.
|
42 |
|
43 |
Args:
|
44 |
+
audio_file (str): Path to the audio file.
|
45 |
|
46 |
Returns:
|
47 |
+
str: Transcribed text.
|
48 |
"""
|
49 |
with open(audio_file, "rb") as audio:
|
50 |
response = openai.audio.transcriptions.create(
|
|
|
60 |
Process and transcribe each audio chunk.
|
61 |
|
62 |
Args:
|
63 |
+
audio_chunks (list): List of AudioSegment chunks.
|
64 |
|
65 |
Returns:
|
66 |
+
str: Combined transcription from all chunks.
|
67 |
"""
|
68 |
transcriptions = []
|
69 |
min_length_ms = 100 # Minimum length required by OpenAI API (0.1 seconds)
|
|
|
90 |
Save the transcription as a .docx file.
|
91 |
|
92 |
Args:
|
93 |
+
transcription (str): Transcribed text.
|
94 |
+
audio_file_path (str): Path to the original audio file for naming purposes.
|
95 |
|
96 |
Returns:
|
97 |
+
str: Path to the saved .docx file.
|
98 |
"""
|
99 |
# Extract the base name of the audio file (without extension)
|
100 |
base_name = os.path.splitext(os.path.basename(audio_file_path))[0]
|
|
|
115 |
|
116 |
st.title("Audio Transcription with OpenAI's Whisper")
|
117 |
|
118 |
+
# Allow uploading of audio or video files
|
119 |
uploaded_file = st.file_uploader("Upload an audio or video file", type=["wav", "mp3", "ogg", "m4a", "mp4", "mov"])
|
120 |
|
|
|
121 |
if 'transcription' not in st.session_state:
|
122 |
st.session_state.transcription = None
|
123 |
|
|
|
126 |
|
127 |
# Save uploaded file temporarily
|
128 |
file_extension = uploaded_file.name.split(".")[-1]
|
129 |
+
original_file_name = uploaded_file.name.rsplit('.', 1)[0] # Get original file name without extension
|
130 |
temp_audio_file = f"temp_audio_file.{file_extension}"
|
131 |
with open(temp_audio_file, "wb") as f:
|
132 |
f.write(uploaded_file.getbuffer())
|
133 |
|
134 |
+
# Split and process audio using silence detection
|
135 |
with st.spinner('Transcribing...'):
|
136 |
+
audio_chunks = split_audio_on_silence(
|
137 |
+
temp_audio_file,
|
138 |
+
min_silence_len=500, # adjust based on your audio characteristics
|
139 |
+
silence_thresh=-40, # adjust based on the ambient noise level
|
140 |
+
keep_silence=250 # optional: keeps a bit of silence at the edges
|
141 |
+
)
|
142 |
transcription = process_audio_chunks(audio_chunks)
|
143 |
if transcription:
|
144 |
st.session_state.transcription = transcription
|
|
|
163 |
file_name=st.session_state.output_docx_file,
|
164 |
mime='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
165 |
)
|
|