Chenzhou commited on
Commit
2965889
·
1 Parent(s): 67ee310

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -30
app.py CHANGED
@@ -1,44 +1,18 @@
1
  from transformers import pipeline
2
  import gradio as gr
3
- import os
4
- import subprocess
5
 
6
  pipe = pipeline(model="tilos/whisper-small-zh-HK") # change to "your-username/the-name-you-picked"
7
 
8
- def video2mp3(video_file, output_ext="mp3"):
9
- filename, ext = os.path.splitext(video_file)
10
- subprocess.call(["ffmpeg", "-y", "-i", video_file, f"{filename}.{output_ext}"],
11
- stdout=subprocess.DEVNULL,
12
- stderr=subprocess.STDOUT)
13
- return f"{filename}.{output_ext}"
14
-
15
  def transcribe(audio):
16
  text = pipe(audio)["text"]
17
  return text
18
 
19
- def video_identity(video):
20
- audio_file = video2mp3(video)
21
- text = pipe(audio_file)#["text"]
22
- print(text)
23
- return text
24
-
25
- video = gr.Interface(video_identity,
26
- gr.Video(),
27
- "playable_video",
28
- #examples=[
29
- # os.path.join(os.path.dirname(__file__),
30
- # "video/video_sample.mp4")],
31
- cache_examples=True)
32
-
33
- voice = gr.Interface(
34
- fn=transcribe,
35
- inputs=gr.Audio(source="microphone", type="filepath"),
36
  outputs="text",
37
  title="Whisper Small Cantonese",
38
  description="Realtime demo for Cantonese speech recognition using a fine-tuned Whisper small model.",
39
  )
40
 
41
-
42
-
43
- demo = gr.TabbedInterface([video, voice])
44
- demo.launch()
 
1
  from transformers import pipeline
2
  import gradio as gr
 
 
3
 
4
  pipe = pipeline(model="tilos/whisper-small-zh-HK") # change to "your-username/the-name-you-picked"
5
 
 
 
 
 
 
 
 
6
  def transcribe(audio):
7
  text = pipe(audio)["text"]
8
  return text
9
 
10
+ iface = gr.Interface(
11
+ fn=transcribe,
12
+ inputs=gr.Audio(source="microphone", type="filepath"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  outputs="text",
14
  title="Whisper Small Cantonese",
15
  description="Realtime demo for Cantonese speech recognition using a fine-tuned Whisper small model.",
16
  )
17
 
18
+ iface.launch()