jhj0517 commited on
Commit
8becfaa
Β·
unverified Β·
2 Parent(s): 94c1a40 5f3fe7d

Merge pull request #54 from jhj0517/add-raw-type

Browse files
app.py CHANGED
@@ -50,7 +50,7 @@ class App:
50
  label="Model")
51
  dd_lang = gr.Dropdown(choices=["Automatic Detection"] + self.whisper_inf.available_langs,
52
  value="Automatic Detection", label="Language")
53
- dd_subformat = gr.Dropdown(["SRT", "WebVTT"], value="SRT", label="Subtitle Format")
54
  with gr.Row():
55
  cb_translate = gr.Checkbox(value=False, label="Translate to English?", interactive=True)
56
  with gr.Row():
@@ -66,7 +66,7 @@ class App:
66
  tb_indicator = gr.Textbox(label="Output", scale=8)
67
  btn_openfolder = gr.Button('πŸ“‚', scale=2)
68
 
69
- params = [input_file, dd_model, dd_lang, dd_subformat, cb_translate, cb_timestamp]
70
  advanced_params = [nb_beam_size, nb_log_prob_threshold, nb_no_speech_threshold, dd_compute_type]
71
  btn_run.click(fn=self.whisper_inf.transcribe_file,
72
  inputs=params + advanced_params,
@@ -88,7 +88,7 @@ class App:
88
  label="Model")
89
  dd_lang = gr.Dropdown(choices=["Automatic Detection"] + self.whisper_inf.available_langs,
90
  value="Automatic Detection", label="Language")
91
- dd_subformat = gr.Dropdown(choices=["SRT", "WebVTT"], value="SRT", label="Subtitle Format")
92
  with gr.Row():
93
  cb_translate = gr.Checkbox(value=False, label="Translate to English?", interactive=True)
94
  with gr.Row():
@@ -105,7 +105,7 @@ class App:
105
  tb_indicator = gr.Textbox(label="Output", scale=8)
106
  btn_openfolder = gr.Button('πŸ“‚', scale=2)
107
 
108
- params = [tb_youtubelink, dd_model, dd_lang, dd_subformat, cb_translate, cb_timestamp]
109
  advanced_params = [nb_beam_size, nb_log_prob_threshold, nb_no_speech_threshold, dd_compute_type]
110
  btn_run.click(fn=self.whisper_inf.transcribe_youtube,
111
  inputs=params + advanced_params,
@@ -123,7 +123,7 @@ class App:
123
  label="Model")
124
  dd_lang = gr.Dropdown(choices=["Automatic Detection"] + self.whisper_inf.available_langs,
125
  value="Automatic Detection", label="Language")
126
- dd_subformat = gr.Dropdown(["SRT", "WebVTT"], value="SRT", label="Subtitle Format")
127
  with gr.Row():
128
  cb_translate = gr.Checkbox(value=False, label="Translate to English?", interactive=True)
129
  with gr.Accordion("Advanced_Parameters", open=False):
@@ -137,7 +137,7 @@ class App:
137
  tb_indicator = gr.Textbox(label="Output", scale=8)
138
  btn_openfolder = gr.Button('πŸ“‚', scale=2)
139
 
140
- params = [mic_input, dd_model, dd_lang, dd_subformat, cb_translate]
141
  advanced_params = [nb_beam_size, nb_log_prob_threshold, nb_no_speech_threshold, dd_compute_type]
142
  btn_run.click(fn=self.whisper_inf.transcribe_mic,
143
  inputs=params + advanced_params,
 
50
  label="Model")
51
  dd_lang = gr.Dropdown(choices=["Automatic Detection"] + self.whisper_inf.available_langs,
52
  value="Automatic Detection", label="Language")
53
+ dd_file_format = gr.Dropdown(["SRT", "WebVTT", "txt"], value="SRT", label="File Format")
54
  with gr.Row():
55
  cb_translate = gr.Checkbox(value=False, label="Translate to English?", interactive=True)
56
  with gr.Row():
 
66
  tb_indicator = gr.Textbox(label="Output", scale=8)
67
  btn_openfolder = gr.Button('πŸ“‚', scale=2)
68
 
69
+ params = [input_file, dd_model, dd_lang, dd_file_format, cb_translate, cb_timestamp]
70
  advanced_params = [nb_beam_size, nb_log_prob_threshold, nb_no_speech_threshold, dd_compute_type]
71
  btn_run.click(fn=self.whisper_inf.transcribe_file,
72
  inputs=params + advanced_params,
 
88
  label="Model")
89
  dd_lang = gr.Dropdown(choices=["Automatic Detection"] + self.whisper_inf.available_langs,
90
  value="Automatic Detection", label="Language")
91
+ dd_file_format = gr.Dropdown(choices=["SRT", "WebVTT", "txt"], value="SRT", label="File Format")
92
  with gr.Row():
93
  cb_translate = gr.Checkbox(value=False, label="Translate to English?", interactive=True)
94
  with gr.Row():
 
105
  tb_indicator = gr.Textbox(label="Output", scale=8)
106
  btn_openfolder = gr.Button('πŸ“‚', scale=2)
107
 
108
+ params = [tb_youtubelink, dd_model, dd_lang, dd_file_format, cb_translate, cb_timestamp]
109
  advanced_params = [nb_beam_size, nb_log_prob_threshold, nb_no_speech_threshold, dd_compute_type]
110
  btn_run.click(fn=self.whisper_inf.transcribe_youtube,
111
  inputs=params + advanced_params,
 
123
  label="Model")
124
  dd_lang = gr.Dropdown(choices=["Automatic Detection"] + self.whisper_inf.available_langs,
125
  value="Automatic Detection", label="Language")
126
+ dd_file_format = gr.Dropdown(["SRT", "WebVTT", "txt"], value="SRT", label="File Format")
127
  with gr.Row():
128
  cb_translate = gr.Checkbox(value=False, label="Translate to English?", interactive=True)
129
  with gr.Accordion("Advanced_Parameters", open=False):
 
137
  tb_indicator = gr.Textbox(label="Output", scale=8)
138
  btn_openfolder = gr.Button('πŸ“‚', scale=2)
139
 
140
+ params = [mic_input, dd_model, dd_lang, dd_file_format, cb_translate]
141
  advanced_params = [nb_beam_size, nb_log_prob_threshold, nb_no_speech_threshold, dd_compute_type]
142
  btn_run.click(fn=self.whisper_inf.transcribe_mic,
143
  inputs=params + advanced_params,
modules/faster_whisper_inference.py CHANGED
@@ -13,7 +13,7 @@ import torch
13
  import gradio as gr
14
 
15
  from .base_interface import BaseInterface
16
- from modules.subtitle_manager import get_srt, get_vtt, write_file, safe_filename
17
  from modules.youtube_manager import get_ytdata, get_ytaudio
18
 
19
 
@@ -34,7 +34,7 @@ class FasterWhisperInference(BaseInterface):
34
  fileobjs: list,
35
  model_size: str,
36
  lang: str,
37
- subformat: str,
38
  istranslate: bool,
39
  add_timestamp: bool,
40
  beam_size: int,
@@ -54,8 +54,8 @@ class FasterWhisperInference(BaseInterface):
54
  Whisper model size from gr.Dropdown()
55
  lang: str
56
  Source language of the file to transcribe from gr.Dropdown()
57
- subformat: str
58
- Subtitle format to write from gr.Dropdown(). Supported format: [SRT, WebVTT]
59
  istranslate: bool
60
  Boolean value from gr.Checkbox() that determines whether to translate to English.
61
  It's Whisper's feature to translate speech from another language directly into English end-to-end.
@@ -97,12 +97,13 @@ class FasterWhisperInference(BaseInterface):
97
 
98
  file_name, file_ext = os.path.splitext(os.path.basename(fileobj.orig_name))
99
  file_name = safe_filename(file_name)
100
- subtitle = self.generate_and_write_subtitle(
101
  file_name=file_name,
102
  transcribed_segments=transcribed_segments,
103
  add_timestamp=add_timestamp,
104
- subformat=subformat
105
  )
 
106
  files_info[file_name] = {"subtitle": subtitle, "time_for_task": time_for_task}
107
 
108
  total_result = ''
@@ -125,7 +126,7 @@ class FasterWhisperInference(BaseInterface):
125
  youtubelink: str,
126
  model_size: str,
127
  lang: str,
128
- subformat: str,
129
  istranslate: bool,
130
  add_timestamp: bool,
131
  beam_size: int,
@@ -145,8 +146,8 @@ class FasterWhisperInference(BaseInterface):
145
  Whisper model size from gr.Dropdown()
146
  lang: str
147
  Source language of the file to transcribe from gr.Dropdown()
148
- subformat: str
149
- Subtitle format to write from gr.Dropdown(). Supported format: [SRT, WebVTT]
150
  istranslate: bool
151
  Boolean value from gr.Checkbox() that determines whether to translate to English.
152
  It's Whisper's feature to translate speech from another language directly into English end-to-end.
@@ -191,11 +192,11 @@ class FasterWhisperInference(BaseInterface):
191
  progress(1, desc="Completed!")
192
 
193
  file_name = safe_filename(yt.title)
194
- subtitle = self.generate_and_write_subtitle(
195
  file_name=file_name,
196
  transcribed_segments=transcribed_segments,
197
  add_timestamp=add_timestamp,
198
- subformat=subformat
199
  )
200
  return f"Done in {self.format_time(time_for_task)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
201
  except Exception as e:
@@ -217,7 +218,7 @@ class FasterWhisperInference(BaseInterface):
217
  micaudio: str,
218
  model_size: str,
219
  lang: str,
220
- subformat: str,
221
  istranslate: bool,
222
  beam_size: int,
223
  log_prob_threshold: float,
@@ -236,8 +237,8 @@ class FasterWhisperInference(BaseInterface):
236
  Whisper model size from gr.Dropdown()
237
  lang: str
238
  Source language of the file to transcribe from gr.Dropdown()
239
- subformat: str
240
- Subtitle format to write from gr.Dropdown(). Supported format: [SRT, WebVTT]
241
  istranslate: bool
242
  Boolean value from gr.Checkbox() that determines whether to translate to English.
243
  It's Whisper's feature to translate speech from another language directly into English end-to-end.
@@ -276,11 +277,11 @@ class FasterWhisperInference(BaseInterface):
276
  )
277
  progress(1, desc="Completed!")
278
 
279
- subtitle = self.generate_and_write_subtitle(
280
  file_name="Mic",
281
  transcribed_segments=transcribed_segments,
282
  add_timestamp=True,
283
- subformat=subformat
284
  )
285
  return f"Done in {self.format_time(time_for_task)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
286
  except Exception as e:
@@ -378,11 +379,11 @@ class FasterWhisperInference(BaseInterface):
378
  )
379
 
380
  @staticmethod
381
- def generate_and_write_subtitle(file_name: str,
382
- transcribed_segments: list,
383
- add_timestamp: bool,
384
- subformat: str,
385
- ) -> str:
386
  """
387
  This method writes subtitle file and returns str to gr.Textbox
388
  """
@@ -392,13 +393,18 @@ class FasterWhisperInference(BaseInterface):
392
  else:
393
  output_path = os.path.join("outputs", f"{file_name}")
394
 
395
- if subformat == "SRT":
396
- subtitle = get_srt(transcribed_segments)
397
- write_file(subtitle, f"{output_path}.srt")
398
- elif subformat == "WebVTT":
399
- subtitle = get_vtt(transcribed_segments)
400
- write_file(subtitle, f"{output_path}.vtt")
401
- return subtitle
 
 
 
 
 
402
 
403
  @staticmethod
404
  def format_time(elapsed_time: float) -> str:
 
13
  import gradio as gr
14
 
15
  from .base_interface import BaseInterface
16
+ from modules.subtitle_manager import get_srt, get_vtt, get_txt, write_file, safe_filename
17
  from modules.youtube_manager import get_ytdata, get_ytaudio
18
 
19
 
 
34
  fileobjs: list,
35
  model_size: str,
36
  lang: str,
37
+ file_format: str,
38
  istranslate: bool,
39
  add_timestamp: bool,
40
  beam_size: int,
 
54
  Whisper model size from gr.Dropdown()
55
  lang: str
56
  Source language of the file to transcribe from gr.Dropdown()
57
+ file_format: str
58
+ File format to write from gr.Dropdown(). Supported format: [SRT, WebVTT, txt]
59
  istranslate: bool
60
  Boolean value from gr.Checkbox() that determines whether to translate to English.
61
  It's Whisper's feature to translate speech from another language directly into English end-to-end.
 
97
 
98
  file_name, file_ext = os.path.splitext(os.path.basename(fileobj.orig_name))
99
  file_name = safe_filename(file_name)
100
+ subtitle = self.generate_and_write_file(
101
  file_name=file_name,
102
  transcribed_segments=transcribed_segments,
103
  add_timestamp=add_timestamp,
104
+ file_format=file_format
105
  )
106
+ print(f"{subtitle}")
107
  files_info[file_name] = {"subtitle": subtitle, "time_for_task": time_for_task}
108
 
109
  total_result = ''
 
126
  youtubelink: str,
127
  model_size: str,
128
  lang: str,
129
+ file_format: str,
130
  istranslate: bool,
131
  add_timestamp: bool,
132
  beam_size: int,
 
146
  Whisper model size from gr.Dropdown()
147
  lang: str
148
  Source language of the file to transcribe from gr.Dropdown()
149
+ file_format: str
150
+ File format to write from gr.Dropdown(). Supported format: [SRT, WebVTT, txt]
151
  istranslate: bool
152
  Boolean value from gr.Checkbox() that determines whether to translate to English.
153
  It's Whisper's feature to translate speech from another language directly into English end-to-end.
 
192
  progress(1, desc="Completed!")
193
 
194
  file_name = safe_filename(yt.title)
195
+ subtitle = self.generate_and_write_file(
196
  file_name=file_name,
197
  transcribed_segments=transcribed_segments,
198
  add_timestamp=add_timestamp,
199
+ file_format=file_format
200
  )
201
  return f"Done in {self.format_time(time_for_task)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
202
  except Exception as e:
 
218
  micaudio: str,
219
  model_size: str,
220
  lang: str,
221
+ file_format: str,
222
  istranslate: bool,
223
  beam_size: int,
224
  log_prob_threshold: float,
 
237
  Whisper model size from gr.Dropdown()
238
  lang: str
239
  Source language of the file to transcribe from gr.Dropdown()
240
+ file_format: str
241
+ File format to write from gr.Dropdown(). Supported format: [SRT, WebVTT, txt]
242
  istranslate: bool
243
  Boolean value from gr.Checkbox() that determines whether to translate to English.
244
  It's Whisper's feature to translate speech from another language directly into English end-to-end.
 
277
  )
278
  progress(1, desc="Completed!")
279
 
280
+ subtitle = self.generate_and_write_file(
281
  file_name="Mic",
282
  transcribed_segments=transcribed_segments,
283
  add_timestamp=True,
284
+ file_format=file_format
285
  )
286
  return f"Done in {self.format_time(time_for_task)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
287
  except Exception as e:
 
379
  )
380
 
381
  @staticmethod
382
+ def generate_and_write_file(file_name: str,
383
+ transcribed_segments: list,
384
+ add_timestamp: bool,
385
+ file_format: str,
386
+ ) -> str:
387
  """
388
  This method writes subtitle file and returns str to gr.Textbox
389
  """
 
393
  else:
394
  output_path = os.path.join("outputs", f"{file_name}")
395
 
396
+ if file_format == "SRT":
397
+ content = get_srt(transcribed_segments)
398
+ write_file(content, f"{output_path}.srt")
399
+
400
+ elif file_format == "WebVTT":
401
+ content = get_vtt(transcribed_segments)
402
+ write_file(content, f"{output_path}.vtt")
403
+
404
+ elif file_format == "txt":
405
+ content = get_txt(transcribed_segments)
406
+ write_file(content, f"{output_path}.txt")
407
+ return content
408
 
409
  @staticmethod
410
  def format_time(elapsed_time: float) -> str:
modules/subtitle_manager.py CHANGED
@@ -44,6 +44,15 @@ def get_vtt(segments):
44
  return output
45
 
46
 
 
 
 
 
 
 
 
 
 
47
  def parse_srt(file_path):
48
  """Reads SRT file and returns as dict"""
49
  with open(file_path, 'r', encoding='utf-8') as file:
 
44
  return output
45
 
46
 
47
+ def get_txt(segments):
48
+ output = ""
49
+ for i, segment in enumerate(segments):
50
+ if segment['text'].startswith(' '):
51
+ segment['text'] = segment['text'][1:]
52
+ output += f"{segment['text']}\n"
53
+ return output
54
+
55
+
56
  def parse_srt(file_path):
57
  """Reads SRT file and returns as dict"""
58
  with open(file_path, 'r', encoding='utf-8') as file:
modules/whisper_Inference.py CHANGED
@@ -8,7 +8,7 @@ from datetime import datetime
8
  import torch
9
 
10
  from .base_interface import BaseInterface
11
- from modules.subtitle_manager import get_srt, get_vtt, write_file, safe_filename
12
  from modules.youtube_manager import get_ytdata, get_ytaudio
13
 
14
  DEFAULT_MODEL_SIZE = "large-v2"
@@ -30,7 +30,7 @@ class WhisperInference(BaseInterface):
30
  fileobjs: list,
31
  model_size: str,
32
  lang: str,
33
- subformat: str,
34
  istranslate: bool,
35
  add_timestamp: bool,
36
  beam_size: int,
@@ -49,8 +49,8 @@ class WhisperInference(BaseInterface):
49
  Whisper model size from gr.Dropdown()
50
  lang: str
51
  Source language of the file to transcribe from gr.Dropdown()
52
- subformat: str
53
- Subtitle format to write from gr.Dropdown(). Supported format: [SRT, WebVTT]
54
  istranslate: bool
55
  Boolean value from gr.Checkbox() that determines whether to translate to English.
56
  It's Whisper's feature to translate speech from another language directly into English end-to-end.
@@ -93,11 +93,11 @@ class WhisperInference(BaseInterface):
93
 
94
  file_name, file_ext = os.path.splitext(os.path.basename(fileobj.orig_name))
95
  file_name = safe_filename(file_name)
96
- subtitle = self.generate_and_write_subtitle(
97
  file_name=file_name,
98
  transcribed_segments=result,
99
  add_timestamp=add_timestamp,
100
- subformat=subformat
101
  )
102
 
103
  files_info[file_name] = {"subtitle": subtitle, "elapsed_time": elapsed_time}
@@ -122,7 +122,7 @@ class WhisperInference(BaseInterface):
122
  youtubelink: str,
123
  model_size: str,
124
  lang: str,
125
- subformat: str,
126
  istranslate: bool,
127
  add_timestamp: bool,
128
  beam_size: int,
@@ -141,8 +141,8 @@ class WhisperInference(BaseInterface):
141
  Whisper model size from gr.Dropdown()
142
  lang: str
143
  Source language of the file to transcribe from gr.Dropdown()
144
- subformat: str
145
- Subtitle format to write from gr.Dropdown(). Supported format: [SRT, WebVTT]
146
  istranslate: bool
147
  Boolean value from gr.Checkbox() that determines whether to translate to English.
148
  It's Whisper's feature to translate speech from another language directly into English end-to-end.
@@ -181,11 +181,11 @@ class WhisperInference(BaseInterface):
181
  progress(1, desc="Completed!")
182
 
183
  file_name = safe_filename(yt.title)
184
- subtitle = self.generate_and_write_subtitle(
185
  file_name=file_name,
186
  transcribed_segments=result,
187
  add_timestamp=add_timestamp,
188
- subformat=subformat
189
  )
190
 
191
  return f"Done in {self.format_time(elapsed_time)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
@@ -209,7 +209,7 @@ class WhisperInference(BaseInterface):
209
  micaudio: str,
210
  model_size: str,
211
  lang: str,
212
- subformat: str,
213
  istranslate: bool,
214
  beam_size: int,
215
  log_prob_threshold: float,
@@ -227,8 +227,8 @@ class WhisperInference(BaseInterface):
227
  Whisper model size from gr.Dropdown()
228
  lang: str
229
  Source language of the file to transcribe from gr.Dropdown()
230
- subformat: str
231
- Subtitle format to write from gr.Dropdown(). Supported format: [SRT, WebVTT]
232
  istranslate: bool
233
  Boolean value from gr.Checkbox() that determines whether to translate to English.
234
  It's Whisper's feature to translate speech from another language directly into English end-to-end.
@@ -261,11 +261,11 @@ class WhisperInference(BaseInterface):
261
  progress=progress)
262
  progress(1, desc="Completed!")
263
 
264
- subtitle = self.generate_and_write_subtitle(
265
  file_name="Mic",
266
  transcribed_segments=result,
267
  add_timestamp=True,
268
- subformat=subformat
269
  )
270
 
271
  return f"Done in {self.format_time(elapsed_time)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
@@ -361,11 +361,11 @@ class WhisperInference(BaseInterface):
361
  )
362
 
363
  @staticmethod
364
- def generate_and_write_subtitle(file_name: str,
365
- transcribed_segments: list,
366
- add_timestamp: bool,
367
- subformat: str,
368
- ) -> str:
369
  """
370
  This method writes subtitle file and returns str to gr.Textbox
371
  """
@@ -375,13 +375,18 @@ class WhisperInference(BaseInterface):
375
  else:
376
  output_path = os.path.join("outputs", f"{file_name}")
377
 
378
- if subformat == "SRT":
379
- subtitle = get_srt(transcribed_segments)
380
- write_file(subtitle, f"{output_path}.srt")
381
- elif subformat == "WebVTT":
382
- subtitle = get_vtt(transcribed_segments)
383
- write_file(subtitle, f"{output_path}.vtt")
384
- return subtitle
 
 
 
 
 
385
 
386
  @staticmethod
387
  def format_time(elapsed_time: float) -> str:
 
8
  import torch
9
 
10
  from .base_interface import BaseInterface
11
+ from modules.subtitle_manager import get_srt, get_vtt, get_txt, write_file, safe_filename
12
  from modules.youtube_manager import get_ytdata, get_ytaudio
13
 
14
  DEFAULT_MODEL_SIZE = "large-v2"
 
30
  fileobjs: list,
31
  model_size: str,
32
  lang: str,
33
+ file_format: str,
34
  istranslate: bool,
35
  add_timestamp: bool,
36
  beam_size: int,
 
49
  Whisper model size from gr.Dropdown()
50
  lang: str
51
  Source language of the file to transcribe from gr.Dropdown()
52
+ file_format: str
53
+ File format to write from gr.Dropdown(). Supported format: [SRT, WebVTT, txt]
54
  istranslate: bool
55
  Boolean value from gr.Checkbox() that determines whether to translate to English.
56
  It's Whisper's feature to translate speech from another language directly into English end-to-end.
 
93
 
94
  file_name, file_ext = os.path.splitext(os.path.basename(fileobj.orig_name))
95
  file_name = safe_filename(file_name)
96
+ subtitle = self.generate_and_write_file(
97
  file_name=file_name,
98
  transcribed_segments=result,
99
  add_timestamp=add_timestamp,
100
+ file_format=file_format
101
  )
102
 
103
  files_info[file_name] = {"subtitle": subtitle, "elapsed_time": elapsed_time}
 
122
  youtubelink: str,
123
  model_size: str,
124
  lang: str,
125
+ file_format: str,
126
  istranslate: bool,
127
  add_timestamp: bool,
128
  beam_size: int,
 
141
  Whisper model size from gr.Dropdown()
142
  lang: str
143
  Source language of the file to transcribe from gr.Dropdown()
144
+ file_format: str
145
+ File format to write from gr.Dropdown(). Supported format: [SRT, WebVTT, txt]
146
  istranslate: bool
147
  Boolean value from gr.Checkbox() that determines whether to translate to English.
148
  It's Whisper's feature to translate speech from another language directly into English end-to-end.
 
181
  progress(1, desc="Completed!")
182
 
183
  file_name = safe_filename(yt.title)
184
+ subtitle = self.generate_and_write_file(
185
  file_name=file_name,
186
  transcribed_segments=result,
187
  add_timestamp=add_timestamp,
188
+ file_format=file_format
189
  )
190
 
191
  return f"Done in {self.format_time(elapsed_time)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
 
209
  micaudio: str,
210
  model_size: str,
211
  lang: str,
212
+ file_format: str,
213
  istranslate: bool,
214
  beam_size: int,
215
  log_prob_threshold: float,
 
227
  Whisper model size from gr.Dropdown()
228
  lang: str
229
  Source language of the file to transcribe from gr.Dropdown()
230
+ file_format: str
231
+ Subtitle format to write from gr.Dropdown(). Supported format: [SRT, WebVTT, txt]
232
  istranslate: bool
233
  Boolean value from gr.Checkbox() that determines whether to translate to English.
234
  It's Whisper's feature to translate speech from another language directly into English end-to-end.
 
261
  progress=progress)
262
  progress(1, desc="Completed!")
263
 
264
+ subtitle = self.generate_and_write_file(
265
  file_name="Mic",
266
  transcribed_segments=result,
267
  add_timestamp=True,
268
+ file_format=file_format
269
  )
270
 
271
  return f"Done in {self.format_time(elapsed_time)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
 
361
  )
362
 
363
  @staticmethod
364
+ def generate_and_write_file(file_name: str,
365
+ transcribed_segments: list,
366
+ add_timestamp: bool,
367
+ file_format: str,
368
+ ) -> str:
369
  """
370
  This method writes subtitle file and returns str to gr.Textbox
371
  """
 
375
  else:
376
  output_path = os.path.join("outputs", f"{file_name}")
377
 
378
+ if file_format == "SRT":
379
+ content = get_srt(transcribed_segments)
380
+ write_file(content, f"{output_path}.srt")
381
+
382
+ elif file_format == "WebVTT":
383
+ content = get_vtt(transcribed_segments)
384
+ write_file(content, f"{output_path}.vtt")
385
+
386
+ elif file_format == "txt":
387
+ content = get_txt(transcribed_segments)
388
+ write_file(content, f"{output_path}.vtt")
389
+ return content
390
 
391
  @staticmethod
392
  def format_time(elapsed_time: float) -> str: