Chenzhou commited on
Commit
67ee310
·
1 Parent(s): 17343fc

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -0
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ import gradio as gr
3
+ import os
4
+ import subprocess
5
+
6
+ pipe = pipeline(model="tilos/whisper-small-zh-HK") # change to "your-username/the-name-you-picked"
7
+
8
+ def video2mp3(video_file, output_ext="mp3"):
9
+ filename, ext = os.path.splitext(video_file)
10
+ subprocess.call(["ffmpeg", "-y", "-i", video_file, f"{filename}.{output_ext}"],
11
+ stdout=subprocess.DEVNULL,
12
+ stderr=subprocess.STDOUT)
13
+ return f"{filename}.{output_ext}"
14
+
15
+ def transcribe(audio):
16
+ text = pipe(audio)["text"]
17
+ return text
18
+
19
+ def video_identity(video):
20
+ audio_file = video2mp3(video)
21
+ text = pipe(audio_file)#["text"]
22
+ print(text)
23
+ return text
24
+
25
+ video = gr.Interface(video_identity,
26
+ gr.Video(),
27
+ "playable_video",
28
+ #examples=[
29
+ # os.path.join(os.path.dirname(__file__),
30
+ # "video/video_sample.mp4")],
31
+ cache_examples=True)
32
+
33
+ voice = gr.Interface(
34
+ fn=transcribe,
35
+ inputs=gr.Audio(source="microphone", type="filepath"),
36
+ outputs="text",
37
+ title="Whisper Small Cantonese",
38
+ description="Realtime demo for Cantonese speech recognition using a fine-tuned Whisper small model.",
39
+ )
40
+
41
+
42
+
43
+ demo = gr.TabbedInterface([video, voice])
44
+ demo.launch()