Youtube-Whisperer

Runtime error

App Files Files Community

AI-DHD commited on Dec 11, 2022

Commit

ef99bde

1 Parent(s): 2e979b3

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -6

app.py CHANGED Viewed

@@ -1,25 +1,49 @@
 import gradio as gr
 import whisper
 from pytube import YouTube
-import os
 class GradioInference():
   def __init__(self):
     self.sizes = list(whisper._MODELS.keys())
     self.current_size = "base"
     self.loaded_model = whisper.load_model(self.current_size)
     self.yt = None
-  def __call__(self, link, file, size, subs):
     if self.yt is None:
       self.yt = YouTube(link)
     path = self.yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
     if size != self.current_size:
       self.loaded_model = whisper.load_model(size)
       self.current_size = size
-    results = self.loaded_model.transcribe(path)
   def format_time(self, time):
     hours = time//3600
@@ -37,7 +61,6 @@ title="Youtube Whisperer"
 description="Speech to text transcription of Youtube videos using OpenAI's Whisper"
 block = gr.Blocks()
 with block:
     gr.HTML(
         """
@@ -55,6 +78,9 @@ with block:
         with gr.Box():
           with gr.Row().style(equal_height=True):
             sz = gr.Dropdown(label="Model Size", choices=gio.sizes, value='base')
           link = gr.Textbox(label="YouTube Link")
           title = gr.Label(label="Video Title")
           with gr.Row().style(equal_height=True):
@@ -62,6 +88,6 @@ with block:
             text = gr.Textbox(label="Transcription", placeholder="Transcription Output", lines=10)
           with gr.Row().style(equal_height=True):
               btn = gr.Button("Transcribe")
-          btn.click(gio, inputs=[link, sz], outputs=[text])
           link.change(gio.populate_metadata, inputs=[link], outputs=[img, title])
 block.launch()

 import gradio as gr
 import whisper
 from pytube import YouTube
 class GradioInference():
   def __init__(self):
     self.sizes = list(whisper._MODELS.keys())
+    self.langs = ["none"] + sorted(list(whisper.tokenizer.LANGUAGES.values()))
     self.current_size = "base"
     self.loaded_model = whisper.load_model(self.current_size)
     self.yt = None
+  def __call__(self, link, lang, size, subs):
     if self.yt is None:
       self.yt = YouTube(link)
     path = self.yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
+    if lang == "none":
+      lang = None
     if size != self.current_size:
       self.loaded_model = whisper.load_model(size)
       self.current_size = size
+    results = self.loaded_model.transcribe(path, language=lang)
+    if subs == "None":
+      return results["text"]
+    elif subs == ".srt":
+      return self.srt(results["segments"])
+    elif ".csv" == ".csv":
+      return self.csv(results["segments"])
+  def srt(self, segments):
+    output = ""
+    for i, segment in enumerate(segments):
+      output += f"{i+1}\n"
+      output += f"{self.format_time(segment['start'])} --> {self.format_time(segment['end'])}\n"
+      output += f"{segment['text']}\n\n"
+    return output
+  def csv(self, segments):
+    output = ""
+    for segment in segments:
+      output += f"{segment['start']},{segment['end']},{segment['text']}\n"
+    return output
   def format_time(self, time):
     hours = time//3600
 description="Speech to text transcription of Youtube videos using OpenAI's Whisper"
 block = gr.Blocks()
 with block:
     gr.HTML(
         """
         with gr.Box():
           with gr.Row().style(equal_height=True):
             sz = gr.Dropdown(label="Model Size", choices=gio.sizes, value='base')
+            lang = gr.Dropdown(label="Language (Optional)", choices=gio.langs, value="none")
+          with gr.Row().style(equal_height=True):
+            wt = gr.Radio(["None", ".srt", ".csv"], label="With Timestamps?")
           link = gr.Textbox(label="YouTube Link")
           title = gr.Label(label="Video Title")
           with gr.Row().style(equal_height=True):
             text = gr.Textbox(label="Transcription", placeholder="Transcription Output", lines=10)
           with gr.Row().style(equal_height=True):
               btn = gr.Button("Transcribe")
+          btn.click(gio, inputs=[link, lang, sz, wt], outputs=[text])
           link.change(gio.populate_metadata, inputs=[link], outputs=[img, title])
 block.launch()