Deepakkori45 commited on
Commit
1c336cb
·
verified ·
1 Parent(s): 5bfb1a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -43
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import streamlit as st
2
- import openai
3
  import os
4
  from pydub import AudioSegment
5
  from pydub.silence import split_on_silence
@@ -7,12 +6,22 @@ from dotenv import load_dotenv
7
  from tempfile import NamedTemporaryFile
8
  import math
9
  from docx import Document
 
10
 
11
- # Load environment variables from .env file
12
  load_dotenv()
13
 
14
- # Set your OpenAI API key
15
- openai.api_key = os.getenv("OPENAI_API_KEY")
 
 
 
 
 
 
 
 
 
16
 
17
  def split_audio_on_silence(audio_file_path, min_silence_len=500, silence_thresh=-40, keep_silence=250):
18
  """
@@ -20,7 +29,7 @@ def split_audio_on_silence(audio_file_path, min_silence_len=500, silence_thresh=
20
 
21
  Args:
22
  audio_file_path (str): Path to the audio file.
23
- min_silence_len (int): Minimum length of silence (in ms) required to be used as a split point.
24
  silence_thresh (int): The volume (in dBFS) below which is considered silence.
25
  keep_silence (int): Amount of silence (in ms) to retain at the beginning and end of each chunk.
26
 
@@ -38,41 +47,35 @@ def split_audio_on_silence(audio_file_path, min_silence_len=500, silence_thresh=
38
 
39
  def transcribe(audio_file):
40
  """
41
- Transcribe an audio file using the OpenAI Whisper model.
42
-
43
  Args:
44
  audio_file (str): Path to the audio file.
45
-
46
  Returns:
47
  str: Transcribed text.
48
  """
49
- with open(audio_file, "rb") as audio:
50
- response = openai.audio.transcriptions.create(
51
- model="whisper-1",
52
- file=audio,
53
- response_format="text",
54
- language="en" # Ensures transcription is in English
55
- )
56
- return response
57
 
58
  def process_audio_chunks(audio_chunks):
59
  """
60
- Process and transcribe each audio chunk.
61
-
62
  Args:
63
  audio_chunks (list): List of AudioSegment chunks.
64
-
65
  Returns:
66
  str: Combined transcription from all chunks.
67
  """
68
  transcriptions = []
69
- min_length_ms = 100 # Minimum length required by OpenAI API (0.1 seconds)
70
-
71
  for i, chunk in enumerate(audio_chunks):
72
  if len(chunk) < min_length_ms:
73
  st.warning(f"Chunk {i} is too short to be processed.")
74
  continue
75
 
 
76
  with NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
77
  chunk.export(temp_audio_file.name, format="wav")
78
  temp_audio_file_path = temp_audio_file.name
@@ -88,32 +91,22 @@ def process_audio_chunks(audio_chunks):
88
  def save_transcription_to_docx(transcription, audio_file_path):
89
  """
90
  Save the transcription as a .docx file.
91
-
92
  Args:
93
  transcription (str): Transcribed text.
94
  audio_file_path (str): Path to the original audio file for naming purposes.
95
-
96
  Returns:
97
  str: Path to the saved .docx file.
98
  """
99
- # Extract the base name of the audio file (without extension)
100
  base_name = os.path.splitext(os.path.basename(audio_file_path))[0]
101
-
102
- # Create a new file name by appending "_full_transcription" with .docx extension
103
  output_file_name = f"{base_name}_full_transcription.docx"
104
-
105
- # Create a new Document object
106
  doc = Document()
107
-
108
- # Add the transcription text to the document
109
  doc.add_paragraph(transcription)
110
-
111
- # Save the document in .docx format
112
  doc.save(output_file_name)
113
-
114
  return output_file_name
115
 
116
- st.title("Audio Transcription with OpenAI's Whisper")
117
 
118
  # Allow uploading of audio or video files
119
  uploaded_file = st.file_uploader("Upload an audio or video file", type=["wav", "mp3", "ogg", "m4a", "mp4", "mov"])
@@ -126,7 +119,6 @@ if uploaded_file is not None and st.session_state.transcription is None:
126
 
127
  # Save uploaded file temporarily
128
  file_extension = uploaded_file.name.split(".")[-1]
129
- original_file_name = uploaded_file.name.rsplit('.', 1)[0] # Get original file name without extension
130
  temp_audio_file = f"temp_audio_file.{file_extension}"
131
  with open(temp_audio_file, "wb") as f:
132
  f.write(uploaded_file.getbuffer())
@@ -135,27 +127,22 @@ if uploaded_file is not None and st.session_state.transcription is None:
135
  with st.spinner('Transcribing...'):
136
  audio_chunks = split_audio_on_silence(
137
  temp_audio_file,
138
- min_silence_len=500, # adjust based on your audio characteristics
139
- silence_thresh=-40, # adjust based on the ambient noise level
140
- keep_silence=250 # optional: keeps a bit of silence at the edges
141
  )
142
  transcription = process_audio_chunks(audio_chunks)
143
  if transcription:
144
  st.session_state.transcription = transcription
145
  st.success('Transcription complete!')
146
-
147
- # Save transcription to a Word (.docx) file
148
  output_docx_file = save_transcription_to_docx(transcription, uploaded_file.name)
149
  st.session_state.output_docx_file = output_docx_file
150
 
151
- # Clean up temporary file
152
  if os.path.exists(temp_audio_file):
153
  os.remove(temp_audio_file)
154
 
155
  if st.session_state.transcription:
156
  st.text_area("Transcription", st.session_state.transcription, key="transcription_area_final")
157
-
158
- # Download the transcription as a .docx file
159
  with open(st.session_state.output_docx_file, "rb") as docx_file:
160
  st.download_button(
161
  label="Download Transcription (.docx)",
 
1
  import streamlit as st
 
2
  import os
3
  from pydub import AudioSegment
4
  from pydub.silence import split_on_silence
 
6
  from tempfile import NamedTemporaryFile
7
  import math
8
  from docx import Document
9
+ import whisper
10
 
11
+ # Load environment variables from .env file (if needed for other configurations)
12
  load_dotenv()
13
 
14
+ @st.cache_resource
15
+ def load_whisper_model():
16
+ """
17
+ Load the Whisper model once and cache it for future use.
18
+ You can choose the model size: "tiny", "base", "small", "medium", or "large".
19
+ """
20
+ model = whisper.load_model("base")
21
+ return model
22
+
23
+ # Load the Whisper model globally so it’s only loaded once.
24
+ model = load_whisper_model()
25
 
26
  def split_audio_on_silence(audio_file_path, min_silence_len=500, silence_thresh=-40, keep_silence=250):
27
  """
 
29
 
30
  Args:
31
  audio_file_path (str): Path to the audio file.
32
+ min_silence_len (int): Minimum length of silence (in ms) required for a split.
33
  silence_thresh (int): The volume (in dBFS) below which is considered silence.
34
  keep_silence (int): Amount of silence (in ms) to retain at the beginning and end of each chunk.
35
 
 
47
 
48
  def transcribe(audio_file):
49
  """
50
+ Transcribe an audio file using the locally loaded Whisper model.
51
+
52
  Args:
53
  audio_file (str): Path to the audio file.
54
+
55
  Returns:
56
  str: Transcribed text.
57
  """
58
+ result = model.transcribe(audio_file, language="en")
59
+ return result["text"]
 
 
 
 
 
 
60
 
61
  def process_audio_chunks(audio_chunks):
62
  """
63
+ Process and transcribe each audio chunk in sequence.
64
+
65
  Args:
66
  audio_chunks (list): List of AudioSegment chunks.
67
+
68
  Returns:
69
  str: Combined transcription from all chunks.
70
  """
71
  transcriptions = []
72
+ min_length_ms = 100 # Minimum length required for processing
 
73
  for i, chunk in enumerate(audio_chunks):
74
  if len(chunk) < min_length_ms:
75
  st.warning(f"Chunk {i} is too short to be processed.")
76
  continue
77
 
78
+ # Save the chunk temporarily as a WAV file
79
  with NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
80
  chunk.export(temp_audio_file.name, format="wav")
81
  temp_audio_file_path = temp_audio_file.name
 
91
  def save_transcription_to_docx(transcription, audio_file_path):
92
  """
93
  Save the transcription as a .docx file.
94
+
95
  Args:
96
  transcription (str): Transcribed text.
97
  audio_file_path (str): Path to the original audio file for naming purposes.
98
+
99
  Returns:
100
  str: Path to the saved .docx file.
101
  """
 
102
  base_name = os.path.splitext(os.path.basename(audio_file_path))[0]
 
 
103
  output_file_name = f"{base_name}_full_transcription.docx"
 
 
104
  doc = Document()
 
 
105
  doc.add_paragraph(transcription)
 
 
106
  doc.save(output_file_name)
 
107
  return output_file_name
108
 
109
+ st.title("Audio Transcription with Whisper (Local)")
110
 
111
  # Allow uploading of audio or video files
112
  uploaded_file = st.file_uploader("Upload an audio or video file", type=["wav", "mp3", "ogg", "m4a", "mp4", "mov"])
 
119
 
120
  # Save uploaded file temporarily
121
  file_extension = uploaded_file.name.split(".")[-1]
 
122
  temp_audio_file = f"temp_audio_file.{file_extension}"
123
  with open(temp_audio_file, "wb") as f:
124
  f.write(uploaded_file.getbuffer())
 
127
  with st.spinner('Transcribing...'):
128
  audio_chunks = split_audio_on_silence(
129
  temp_audio_file,
130
+ min_silence_len=500, # adjust based on your audio
131
+ silence_thresh=-40, # adjust based on ambient noise level
132
+ keep_silence=250 # retains a bit of silence at the edges
133
  )
134
  transcription = process_audio_chunks(audio_chunks)
135
  if transcription:
136
  st.session_state.transcription = transcription
137
  st.success('Transcription complete!')
 
 
138
  output_docx_file = save_transcription_to_docx(transcription, uploaded_file.name)
139
  st.session_state.output_docx_file = output_docx_file
140
 
 
141
  if os.path.exists(temp_audio_file):
142
  os.remove(temp_audio_file)
143
 
144
  if st.session_state.transcription:
145
  st.text_area("Transcription", st.session_state.transcription, key="transcription_area_final")
 
 
146
  with open(st.session_state.output_docx_file, "rb") as docx_file:
147
  st.download_button(
148
  label="Download Transcription (.docx)",