AI-DHD commited on
Commit
ef99bde
·
1 Parent(s): 2e979b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -6
app.py CHANGED
@@ -1,25 +1,49 @@
1
  import gradio as gr
2
  import whisper
3
  from pytube import YouTube
4
- import os
5
 
6
  class GradioInference():
7
  def __init__(self):
8
  self.sizes = list(whisper._MODELS.keys())
 
9
  self.current_size = "base"
10
  self.loaded_model = whisper.load_model(self.current_size)
11
  self.yt = None
12
 
13
- def __call__(self, link, file, size, subs):
14
  if self.yt is None:
15
  self.yt = YouTube(link)
16
-
17
  path = self.yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
18
 
 
 
 
19
  if size != self.current_size:
20
  self.loaded_model = whisper.load_model(size)
21
  self.current_size = size
22
- results = self.loaded_model.transcribe(path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  def format_time(self, time):
25
  hours = time//3600
@@ -37,7 +61,6 @@ title="Youtube Whisperer"
37
  description="Speech to text transcription of Youtube videos using OpenAI's Whisper"
38
 
39
  block = gr.Blocks()
40
-
41
  with block:
42
  gr.HTML(
43
  """
@@ -55,6 +78,9 @@ with block:
55
  with gr.Box():
56
  with gr.Row().style(equal_height=True):
57
  sz = gr.Dropdown(label="Model Size", choices=gio.sizes, value='base')
 
 
 
58
  link = gr.Textbox(label="YouTube Link")
59
  title = gr.Label(label="Video Title")
60
  with gr.Row().style(equal_height=True):
@@ -62,6 +88,6 @@ with block:
62
  text = gr.Textbox(label="Transcription", placeholder="Transcription Output", lines=10)
63
  with gr.Row().style(equal_height=True):
64
  btn = gr.Button("Transcribe")
65
- btn.click(gio, inputs=[link, sz], outputs=[text])
66
  link.change(gio.populate_metadata, inputs=[link], outputs=[img, title])
67
  block.launch()
 
1
  import gradio as gr
2
  import whisper
3
  from pytube import YouTube
4
+
5
 
6
  class GradioInference():
7
  def __init__(self):
8
  self.sizes = list(whisper._MODELS.keys())
9
+ self.langs = ["none"] + sorted(list(whisper.tokenizer.LANGUAGES.values()))
10
  self.current_size = "base"
11
  self.loaded_model = whisper.load_model(self.current_size)
12
  self.yt = None
13
 
14
+ def __call__(self, link, lang, size, subs):
15
  if self.yt is None:
16
  self.yt = YouTube(link)
 
17
  path = self.yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
18
 
19
+ if lang == "none":
20
+ lang = None
21
+
22
  if size != self.current_size:
23
  self.loaded_model = whisper.load_model(size)
24
  self.current_size = size
25
+ results = self.loaded_model.transcribe(path, language=lang)
26
+
27
+ if subs == "None":
28
+ return results["text"]
29
+ elif subs == ".srt":
30
+ return self.srt(results["segments"])
31
+ elif ".csv" == ".csv":
32
+ return self.csv(results["segments"])
33
+
34
+ def srt(self, segments):
35
+ output = ""
36
+ for i, segment in enumerate(segments):
37
+ output += f"{i+1}\n"
38
+ output += f"{self.format_time(segment['start'])} --> {self.format_time(segment['end'])}\n"
39
+ output += f"{segment['text']}\n\n"
40
+ return output
41
+
42
+ def csv(self, segments):
43
+ output = ""
44
+ for segment in segments:
45
+ output += f"{segment['start']},{segment['end']},{segment['text']}\n"
46
+ return output
47
 
48
  def format_time(self, time):
49
  hours = time//3600
 
61
  description="Speech to text transcription of Youtube videos using OpenAI's Whisper"
62
 
63
  block = gr.Blocks()
 
64
  with block:
65
  gr.HTML(
66
  """
 
78
  with gr.Box():
79
  with gr.Row().style(equal_height=True):
80
  sz = gr.Dropdown(label="Model Size", choices=gio.sizes, value='base')
81
+ lang = gr.Dropdown(label="Language (Optional)", choices=gio.langs, value="none")
82
+ with gr.Row().style(equal_height=True):
83
+ wt = gr.Radio(["None", ".srt", ".csv"], label="With Timestamps?")
84
  link = gr.Textbox(label="YouTube Link")
85
  title = gr.Label(label="Video Title")
86
  with gr.Row().style(equal_height=True):
 
88
  text = gr.Textbox(label="Transcription", placeholder="Transcription Output", lines=10)
89
  with gr.Row().style(equal_height=True):
90
  btn = gr.Button("Transcribe")
91
+ btn.click(gio, inputs=[link, lang, sz, wt], outputs=[text])
92
  link.change(gio.populate_metadata, inputs=[link], outputs=[img, title])
93
  block.launch()