Spaces:

marquesafonso
/

multilang-asr-transcriber

Running

marquesafonso commited on 13 days ago

Commit

03195d9

1 Parent(s): db9006e

device move to auto; removed unused bool vars for audio/video interface change

Files changed (2) hide show

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ def main():
         gr.Markdown('An automatic speech recognition tool using [faster-whisper](https://github.com/SYSTRAN/faster-whisper). Supports multilingual video transcription and translation to english. Users may set the max words per line.')
         with gr.Tabs(selected="video") as tabs:
             with gr.Tab("Video", id="video"):
-                video = True
                 file = gr.File(file_types=["video"],type="filepath", label="Upload a video")
                 file_type = gr.Radio(choices=["video"], value="video", label="File Type", visible=False)
                 max_words_per_line = gr.Number(value=6, label="Max words per line")
@@ -26,7 +25,6 @@ def main():
                     allow_flagging="never"
                 )
             with gr.Tab("Audio", id = "audio"):
-                video = False
                 file = gr.File(file_types=["audio"],type="filepath", label="Upload an audio file")
                 file_type = gr.Radio(choices=["audio"], value="audio", label="File Type", visible=False)
                 max_words_per_line = gr.Number(value=6, label="Max words per line")

         gr.Markdown('An automatic speech recognition tool using [faster-whisper](https://github.com/SYSTRAN/faster-whisper). Supports multilingual video transcription and translation to english. Users may set the max words per line.')
         with gr.Tabs(selected="video") as tabs:
             with gr.Tab("Video", id="video"):
                 file = gr.File(file_types=["video"],type="filepath", label="Upload a video")
                 file_type = gr.Radio(choices=["video"], value="video", label="File Type", visible=False)
                 max_words_per_line = gr.Number(value=6, label="Max words per line")
                     allow_flagging="never"
                 )
             with gr.Tab("Audio", id = "audio"):
                 file = gr.File(file_types=["audio"],type="filepath", label="Upload an audio file")
                 file_type = gr.Radio(choices=["audio"], value="audio", label="File Type", visible=False)
                 max_words_per_line = gr.Number(value=6, label="Max words per line")

src/transcriber.py CHANGED Viewed

@@ -77,7 +77,7 @@ def transcriber(file_input:gr.File,
         audio_input = convert_video_to_audio(file_input)
     else:
         audio_input = file_input
-    model = WhisperModel(model_version, device="cpu", compute_type="int8")
     segments, _ = model.transcribe(
         audio_input,
         beam_size=5,

         audio_input = convert_video_to_audio(file_input)
     else:
         audio_input = file_input
+    model = WhisperModel(model_version, device="auto", compute_type="int8")
     segments, _ = model.transcribe(
         audio_input,
         beam_size=5,