Deepakkori45 commited on
Commit
bfbed8e
·
verified ·
1 Parent(s): c29ce38

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -43
app.py CHANGED
@@ -7,6 +7,7 @@ from dotenv import load_dotenv
7
  from tempfile import NamedTemporaryFile
8
  import math
9
  from docx import Document
 
10
 
11
  # Load environment variables from .env file
12
  load_dotenv()
@@ -14,18 +15,128 @@ load_dotenv()
14
  # Set your OpenAI API key
15
  openai.api_key = os.getenv("OPENAI_API_KEY")
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def split_audio_on_silence(audio_file_path, min_silence_len=500, silence_thresh=-40, keep_silence=250):
18
  """
19
  Split an audio file into chunks using silence detection.
20
-
21
- Args:
22
- audio_file_path (str): Path to the audio file.
23
- min_silence_len (int): Minimum length of silence (in ms) required to be used as a split point.
24
- silence_thresh (int): The volume (in dBFS) below which is considered silence.
25
- keep_silence (int): Amount of silence (in ms) to retain at the beginning and end of each chunk.
26
-
27
- Returns:
28
- list: List of AudioSegment chunks.
29
  """
30
  audio = AudioSegment.from_file(audio_file_path)
31
  chunks = split_on_silence(
@@ -39,10 +150,11 @@ def split_audio_on_silence(audio_file_path, min_silence_len=500, silence_thresh=
39
  def transcribe(audio_file):
40
  """
41
  Transcribe an audio file using the OpenAI Whisper model.
42
-
 
43
  Args:
44
  audio_file (str): Path to the audio file.
45
-
46
  Returns:
47
  str: Transcribed text.
48
  """
@@ -51,17 +163,17 @@ def transcribe(audio_file):
51
  model="whisper-1",
52
  file=audio,
53
  response_format="text",
54
- language="en" # Ensures transcription is in English
55
  )
56
  return response
57
 
58
  def process_audio_chunks(audio_chunks):
59
  """
60
  Process and transcribe each audio chunk.
61
-
62
  Args:
63
  audio_chunks (list): List of AudioSegment chunks.
64
-
65
  Returns:
66
  str: Combined transcription from all chunks.
67
  """
@@ -88,29 +200,12 @@ def process_audio_chunks(audio_chunks):
88
  def save_transcription_to_docx(transcription, audio_file_path):
89
  """
90
  Save the transcription as a .docx file.
91
-
92
- Args:
93
- transcription (str): Transcribed text.
94
- audio_file_path (str): Path to the original audio file for naming purposes.
95
-
96
- Returns:
97
- str: Path to the saved .docx file.
98
  """
99
- # Extract the base name of the audio file (without extension)
100
  base_name = os.path.splitext(os.path.basename(audio_file_path))[0]
101
-
102
- # Create a new file name by appending "_full_transcription" with .docx extension
103
  output_file_name = f"{base_name}_full_transcription.docx"
104
-
105
- # Create a new Document object
106
  doc = Document()
107
-
108
- # Add the transcription text to the document
109
  doc.add_paragraph(transcription)
110
-
111
- # Save the document in .docx format
112
  doc.save(output_file_name)
113
-
114
  return output_file_name
115
 
116
  st.title("Audio Transcription with OpenAI's Whisper")
@@ -126,36 +221,32 @@ if uploaded_file is not None and st.session_state.transcription is None:
126
 
127
  # Save uploaded file temporarily
128
  file_extension = uploaded_file.name.split(".")[-1]
129
- original_file_name = uploaded_file.name.rsplit('.', 1)[0] # Get original file name without extension
130
  temp_audio_file = f"temp_audio_file.{file_extension}"
131
  with open(temp_audio_file, "wb") as f:
132
  f.write(uploaded_file.getbuffer())
133
-
134
- # Split and process audio using silence detection
135
  with st.spinner('Transcribing...'):
136
  audio_chunks = split_audio_on_silence(
137
  temp_audio_file,
138
- min_silence_len=500, # adjust based on your audio characteristics
139
- silence_thresh=-40, # adjust based on the ambient noise level
140
- keep_silence=250 # optional: keeps a bit of silence at the edges
141
  )
142
  transcription = process_audio_chunks(audio_chunks)
143
  if transcription:
144
  st.session_state.transcription = transcription
145
  st.success('Transcription complete!')
146
-
147
- # Save transcription to a Word (.docx) file
148
  output_docx_file = save_transcription_to_docx(transcription, uploaded_file.name)
149
  st.session_state.output_docx_file = output_docx_file
150
-
151
- # Clean up temporary file
152
  if os.path.exists(temp_audio_file):
153
  os.remove(temp_audio_file)
154
 
155
  if st.session_state.transcription:
156
  st.text_area("Transcription", st.session_state.transcription, key="transcription_area_final")
157
-
158
- # Download the transcription as a .docx file
159
  with open(st.session_state.output_docx_file, "rb") as docx_file:
160
  st.download_button(
161
  label="Download Transcription (.docx)",
 
7
  from tempfile import NamedTemporaryFile
8
  import math
9
  from docx import Document
10
+ import time
11
 
12
  # Load environment variables from .env file
13
  load_dotenv()
 
15
  # Set your OpenAI API key
16
  openai.api_key = os.getenv("OPENAI_API_KEY")
17
 
18
+ # Comprehensive dictionary of languages supported by Whisper (ISO 639-1 codes)
19
+ # This list is based on the languages supported by the official Whisper model.
20
+ languages = {
21
+ "Afrikaans": "af",
22
+ "Albanian": "sq",
23
+ "Amharic": "am",
24
+ "Arabic": "ar",
25
+ "Armenian": "hy",
26
+ "Assamese": "as",
27
+ "Azerbaijani": "az",
28
+ "Basque": "eu",
29
+ "Belarusian": "be",
30
+ "Bengali": "bn",
31
+ "Bosnian": "bs",
32
+ "Bulgarian": "bg",
33
+ "Burmese": "my",
34
+ "Catalan": "ca",
35
+ "Cebuano": "ceb",
36
+ "Chichewa": "ny",
37
+ "Chinese": "zh",
38
+ "Corsican": "co",
39
+ "Croatian": "hr",
40
+ "Czech": "cs",
41
+ "Danish": "da",
42
+ "Dutch": "nl",
43
+ "English": "en",
44
+ "Esperanto": "eo",
45
+ "Estonian": "et",
46
+ "Filipino": "tl",
47
+ "Finnish": "fi",
48
+ "French": "fr",
49
+ "Frisian": "fy",
50
+ "Galician": "gl",
51
+ "Georgian": "ka",
52
+ "German": "de",
53
+ "Greek": "el",
54
+ "Gujarati": "gu",
55
+ "Haitian Creole": "ht",
56
+ "Hausa": "ha",
57
+ "Hawaiian": "haw",
58
+ "Hebrew": "he",
59
+ "Hindi": "hi",
60
+ "Hmong": "hmn",
61
+ "Hungarian": "hu",
62
+ "Icelandic": "is",
63
+ "Igbo": "ig",
64
+ "Indonesian": "id",
65
+ "Irish": "ga",
66
+ "Italian": "it",
67
+ "Japanese": "ja",
68
+ "Javanese": "jw",
69
+ "Kannada": "kn",
70
+ "Kazakh": "kk",
71
+ "Khmer": "km",
72
+ "Kinyarwanda": "rw",
73
+ "Korean": "ko",
74
+ "Kurdish": "ku",
75
+ "Kyrgyz": "ky",
76
+ "Lao": "lo",
77
+ "Latin": "la",
78
+ "Latvian": "lv",
79
+ "Lithuanian": "lt",
80
+ "Luxembourgish": "lb",
81
+ "Macedonian": "mk",
82
+ "Malagasy": "mg",
83
+ "Malay": "ms",
84
+ "Malayalam": "ml",
85
+ "Maltese": "mt",
86
+ "Maori": "mi",
87
+ "Marathi": "mr",
88
+ "Mongolian": "mn",
89
+ "Nepali": "ne",
90
+ "Norwegian": "no",
91
+ "Nyanja": "ny",
92
+ "Odia": "or",
93
+ "Pashto": "ps",
94
+ "Persian": "fa",
95
+ "Polish": "pl",
96
+ "Portuguese": "pt",
97
+ "Punjabi": "pa",
98
+ "Romanian": "ro",
99
+ "Russian": "ru",
100
+ "Samoan": "sm",
101
+ "Scots Gaelic": "gd",
102
+ "Serbian": "sr",
103
+ "Sesotho": "st",
104
+ "Shona": "sn",
105
+ "Sindhi": "sd",
106
+ "Sinhala": "si",
107
+ "Slovak": "sk",
108
+ "Slovenian": "sl",
109
+ "Somali": "so",
110
+ "Spanish": "es",
111
+ "Sundanese": "su",
112
+ "Swahili": "sw",
113
+ "Swedish": "sv",
114
+ "Tajik": "tg",
115
+ "Tamil": "ta",
116
+ "Tatar": "tt",
117
+ "Telugu": "te",
118
+ "Thai": "th",
119
+ "Turkish": "tr",
120
+ "Turkmen": "tk",
121
+ "Ukrainian": "uk",
122
+ "Urdu": "ur",
123
+ "Uyghur": "ug",
124
+ "Uzbek": "uz",
125
+ "Vietnamese": "vi",
126
+ "Welsh": "cy",
127
+ "Xhosa": "xh",
128
+ "Yiddish": "yi",
129
+ "Yoruba": "yo",
130
+ "Zulu": "zu"
131
+ }
132
+
133
+ # Create a selectbox for language selection; default is English.
134
+ selected_lang_name = st.selectbox("Select transcription language", sorted(languages.keys()), index=sorted(languages.keys()).index("English"))
135
+ selected_language = languages[selected_lang_name]
136
+
137
  def split_audio_on_silence(audio_file_path, min_silence_len=500, silence_thresh=-40, keep_silence=250):
138
  """
139
  Split an audio file into chunks using silence detection.
 
 
 
 
 
 
 
 
 
140
  """
141
  audio = AudioSegment.from_file(audio_file_path)
142
  chunks = split_on_silence(
 
150
  def transcribe(audio_file):
151
  """
152
  Transcribe an audio file using the OpenAI Whisper model.
153
+ This uses the OpenAI API with the forced language set to the selected language.
154
+
155
  Args:
156
  audio_file (str): Path to the audio file.
157
+
158
  Returns:
159
  str: Transcribed text.
160
  """
 
163
  model="whisper-1",
164
  file=audio,
165
  response_format="text",
166
+ language=selected_language # Use the selected language code
167
  )
168
  return response
169
 
170
  def process_audio_chunks(audio_chunks):
171
  """
172
  Process and transcribe each audio chunk.
173
+
174
  Args:
175
  audio_chunks (list): List of AudioSegment chunks.
176
+
177
  Returns:
178
  str: Combined transcription from all chunks.
179
  """
 
200
  def save_transcription_to_docx(transcription, audio_file_path):
201
  """
202
  Save the transcription as a .docx file.
 
 
 
 
 
 
 
203
  """
 
204
  base_name = os.path.splitext(os.path.basename(audio_file_path))[0]
 
 
205
  output_file_name = f"{base_name}_full_transcription.docx"
 
 
206
  doc = Document()
 
 
207
  doc.add_paragraph(transcription)
 
 
208
  doc.save(output_file_name)
 
209
  return output_file_name
210
 
211
  st.title("Audio Transcription with OpenAI's Whisper")
 
221
 
222
  # Save uploaded file temporarily
223
  file_extension = uploaded_file.name.split(".")[-1]
224
+ original_file_name = uploaded_file.name.rsplit('.', 1)[0]
225
  temp_audio_file = f"temp_audio_file.{file_extension}"
226
  with open(temp_audio_file, "wb") as f:
227
  f.write(uploaded_file.getbuffer())
228
+
229
+ processing_start = time.time()
230
  with st.spinner('Transcribing...'):
231
  audio_chunks = split_audio_on_silence(
232
  temp_audio_file,
233
+ min_silence_len=500,
234
+ silence_thresh=-40,
235
+ keep_silence=250
236
  )
237
  transcription = process_audio_chunks(audio_chunks)
238
  if transcription:
239
  st.session_state.transcription = transcription
240
  st.success('Transcription complete!')
 
 
241
  output_docx_file = save_transcription_to_docx(transcription, uploaded_file.name)
242
  st.session_state.output_docx_file = output_docx_file
243
+ processing_duration = time.time() - processing_start
244
+ st.info(f"Total processing time: {processing_duration:.2f} seconds.")
245
  if os.path.exists(temp_audio_file):
246
  os.remove(temp_audio_file)
247
 
248
  if st.session_state.transcription:
249
  st.text_area("Transcription", st.session_state.transcription, key="transcription_area_final")
 
 
250
  with open(st.session_state.output_docx_file, "rb") as docx_file:
251
  st.download_button(
252
  label="Download Transcription (.docx)",