Deepakkori45 commited on
Commit
c29ce38
·
verified ·
1 Parent(s): c14f88e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -48
app.py CHANGED
@@ -1,8 +1,8 @@
1
-
2
  import streamlit as st
3
  import openai
4
  import os
5
  from pydub import AudioSegment
 
6
  from dotenv import load_dotenv
7
  from tempfile import NamedTemporaryFile
8
  import math
@@ -14,54 +14,37 @@ load_dotenv()
14
  # Set your OpenAI API key
15
  openai.api_key = os.getenv("OPENAI_API_KEY")
16
 
17
- def get_chunk_length_ms(file_path, target_size_mb):
18
- """
19
- Calculate the length of each chunk in milliseconds to create chunks of approximately target_size_mb.
20
-
21
- Args:
22
- file_path (str): Path to the audio file.
23
- target_size_mb (int): Target size of each chunk in megabytes.
24
-
25
- Returns:
26
- int: Chunk length in milliseconds.
27
- """
28
- audio = AudioSegment.from_file(file_path)
29
- file_size_bytes = os.path.getsize(file_path)
30
- duration_ms = len(audio)
31
-
32
- # Calculate the approximate duration per byte
33
- duration_per_byte = duration_ms / file_size_bytes
34
-
35
- # Calculate the chunk length in milliseconds for the target size
36
- chunk_length_ms = target_size_mb * 1024 * 1024 * duration_per_byte
37
- return math.floor(chunk_length_ms)
38
-
39
- def split_audio(audio_file_path, chunk_length_ms):
40
  """
41
- Split an audio file into chunks of specified length.
42
-
43
  Args:
44
- audio_file_path (str): Path to the audio file.
45
- chunk_length_ms (int): Length of each chunk in milliseconds.
46
-
 
 
47
  Returns:
48
- list: List of AudioSegment chunks.
49
  """
50
  audio = AudioSegment.from_file(audio_file_path)
51
-
52
- chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
53
-
 
 
 
54
  return chunks
55
 
56
  def transcribe(audio_file):
57
  """
58
- Transcribe an audio file using OpenAI Whisper model.
59
 
60
  Args:
61
- audio_file (str): Path to the audio file.
62
 
63
  Returns:
64
- str: Transcribed text.
65
  """
66
  with open(audio_file, "rb") as audio:
67
  response = openai.audio.transcriptions.create(
@@ -77,10 +60,10 @@ def process_audio_chunks(audio_chunks):
77
  Process and transcribe each audio chunk.
78
 
79
  Args:
80
- audio_chunks (list): List of AudioSegment chunks.
81
 
82
  Returns:
83
- str: Combined transcription from all chunks.
84
  """
85
  transcriptions = []
86
  min_length_ms = 100 # Minimum length required by OpenAI API (0.1 seconds)
@@ -107,11 +90,11 @@ def save_transcription_to_docx(transcription, audio_file_path):
107
  Save the transcription as a .docx file.
108
 
109
  Args:
110
- transcription (str): Transcribed text.
111
- audio_file_path (str): Path to the original audio file for naming purposes.
112
 
113
  Returns:
114
- str: Path to the saved .docx file.
115
  """
116
  # Extract the base name of the audio file (without extension)
117
  base_name = os.path.splitext(os.path.basename(audio_file_path))[0]
@@ -132,10 +115,9 @@ def save_transcription_to_docx(transcription, audio_file_path):
132
 
133
  st.title("Audio Transcription with OpenAI's Whisper")
134
 
135
- # uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "ogg", "m4a"])
136
  uploaded_file = st.file_uploader("Upload an audio or video file", type=["wav", "mp3", "ogg", "m4a", "mp4", "mov"])
137
 
138
-
139
  if 'transcription' not in st.session_state:
140
  st.session_state.transcription = None
141
 
@@ -144,15 +126,19 @@ if uploaded_file is not None and st.session_state.transcription is None:
144
 
145
  # Save uploaded file temporarily
146
  file_extension = uploaded_file.name.split(".")[-1]
147
- original_file_name = uploaded_file.name.rsplit('.', 1)[0] # Get the original file name without extension
148
  temp_audio_file = f"temp_audio_file.{file_extension}"
149
  with open(temp_audio_file, "wb") as f:
150
  f.write(uploaded_file.getbuffer())
151
 
152
- # Split and process audio
153
  with st.spinner('Transcribing...'):
154
- chunk_length_ms = get_chunk_length_ms(temp_audio_file, target_size_mb=1)
155
- audio_chunks = split_audio(temp_audio_file, chunk_length_ms)
 
 
 
 
156
  transcription = process_audio_chunks(audio_chunks)
157
  if transcription:
158
  st.session_state.transcription = transcription
@@ -177,4 +163,3 @@ if st.session_state.transcription:
177
  file_name=st.session_state.output_docx_file,
178
  mime='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
179
  )
180
-
 
 
1
  import streamlit as st
2
  import openai
3
  import os
4
  from pydub import AudioSegment
5
+ from pydub.silence import split_on_silence
6
  from dotenv import load_dotenv
7
  from tempfile import NamedTemporaryFile
8
  import math
 
14
  # Set your OpenAI API key
15
  openai.api_key = os.getenv("OPENAI_API_KEY")
16
 
17
+ def split_audio_on_silence(audio_file_path, min_silence_len=500, silence_thresh=-40, keep_silence=250):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  """
19
+ Split an audio file into chunks using silence detection.
20
+
21
  Args:
22
+ audio_file_path (str): Path to the audio file.
23
+ min_silence_len (int): Minimum length of silence (in ms) required to be used as a split point.
24
+ silence_thresh (int): The volume (in dBFS) below which is considered silence.
25
+ keep_silence (int): Amount of silence (in ms) to retain at the beginning and end of each chunk.
26
+
27
  Returns:
28
+ list: List of AudioSegment chunks.
29
  """
30
  audio = AudioSegment.from_file(audio_file_path)
31
+ chunks = split_on_silence(
32
+ audio,
33
+ min_silence_len=min_silence_len,
34
+ silence_thresh=silence_thresh,
35
+ keep_silence=keep_silence
36
+ )
37
  return chunks
38
 
39
  def transcribe(audio_file):
40
  """
41
+ Transcribe an audio file using the OpenAI Whisper model.
42
 
43
  Args:
44
+ audio_file (str): Path to the audio file.
45
 
46
  Returns:
47
+ str: Transcribed text.
48
  """
49
  with open(audio_file, "rb") as audio:
50
  response = openai.audio.transcriptions.create(
 
60
  Process and transcribe each audio chunk.
61
 
62
  Args:
63
+ audio_chunks (list): List of AudioSegment chunks.
64
 
65
  Returns:
66
+ str: Combined transcription from all chunks.
67
  """
68
  transcriptions = []
69
  min_length_ms = 100 # Minimum length required by OpenAI API (0.1 seconds)
 
90
  Save the transcription as a .docx file.
91
 
92
  Args:
93
+ transcription (str): Transcribed text.
94
+ audio_file_path (str): Path to the original audio file for naming purposes.
95
 
96
  Returns:
97
+ str: Path to the saved .docx file.
98
  """
99
  # Extract the base name of the audio file (without extension)
100
  base_name = os.path.splitext(os.path.basename(audio_file_path))[0]
 
115
 
116
  st.title("Audio Transcription with OpenAI's Whisper")
117
 
118
+ # Allow uploading of audio or video files
119
  uploaded_file = st.file_uploader("Upload an audio or video file", type=["wav", "mp3", "ogg", "m4a", "mp4", "mov"])
120
 
 
121
  if 'transcription' not in st.session_state:
122
  st.session_state.transcription = None
123
 
 
126
 
127
  # Save uploaded file temporarily
128
  file_extension = uploaded_file.name.split(".")[-1]
129
+ original_file_name = uploaded_file.name.rsplit('.', 1)[0] # Get original file name without extension
130
  temp_audio_file = f"temp_audio_file.{file_extension}"
131
  with open(temp_audio_file, "wb") as f:
132
  f.write(uploaded_file.getbuffer())
133
 
134
+ # Split and process audio using silence detection
135
  with st.spinner('Transcribing...'):
136
+ audio_chunks = split_audio_on_silence(
137
+ temp_audio_file,
138
+ min_silence_len=500, # adjust based on your audio characteristics
139
+ silence_thresh=-40, # adjust based on the ambient noise level
140
+ keep_silence=250 # optional: keeps a bit of silence at the edges
141
+ )
142
  transcription = process_audio_chunks(audio_chunks)
143
  if transcription:
144
  st.session_state.transcription = transcription
 
163
  file_name=st.session_state.output_docx_file,
164
  mime='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
165
  )