Dani
commited on
Commit
·
93fc4c5
1
Parent(s):
bdb647b
new inference
Browse files- app.py +42 -12
- requirements.txt +2 -0
app.py
CHANGED
@@ -1,24 +1,54 @@
|
|
1 |
-
from transformers import pipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import gradio as gr
|
3 |
-
from
|
4 |
-
|
5 |
-
|
|
|
|
|
|
|
6 |
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
|
|
|
|
9 |
|
10 |
-
|
|
|
|
|
11 |
|
12 |
-
def transcribe(audio):
|
13 |
-
text = pipe(audio)["text"]
|
14 |
return text
|
15 |
|
16 |
iface = gr.Interface(
|
17 |
-
fn=
|
18 |
-
inputs=
|
19 |
outputs="text",
|
20 |
-
|
21 |
-
|
|
|
22 |
)
|
23 |
|
24 |
iface.launch()
|
|
|
1 |
+
# from transformers import pipeline
|
2 |
+
# import gradio as gr
|
3 |
+
#
|
4 |
+
# pipe = pipeline(model="dacavi/whisper-small-hi") # change to "your-username/the-name-you-picked"
|
5 |
+
# def transcribe(audio):
|
6 |
+
# text = pipe(audio)["text"]
|
7 |
+
# return text
|
8 |
+
#
|
9 |
+
# iface = gr.Interface(
|
10 |
+
# fn=transcribe,
|
11 |
+
# inputs=gr.Audio(sources="microphone", type="filepath"),
|
12 |
+
# outputs="text",
|
13 |
+
# title="Whisper Small Hindi",
|
14 |
+
# description="Realtime demo for Hindi speech recognition using a fine-tuned Whisper small model.",
|
15 |
+
# )
|
16 |
+
#
|
17 |
+
# iface.launch()
|
18 |
+
|
19 |
import gradio as gr
|
20 |
+
from transformers import pipeline
|
21 |
+
from moviepy.editor import VideoFileClip
|
22 |
+
import tempfile
|
23 |
+
import os
|
24 |
+
|
25 |
+
pipe = pipeline(model="dacavi/whisper-small-hi")
|
26 |
|
27 |
+
def transcribe_video(video_url):
|
28 |
+
# Download video and extract audio
|
29 |
+
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video:
|
30 |
+
os.system(f"youtube-dl -o {temp_video.name} {video_url}")
|
31 |
+
video_clip = VideoFileClip(temp_video.name)
|
32 |
+
audio_clip = video_clip.audio
|
33 |
+
temp_audio_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
|
34 |
+
audio_clip.write_audiofile(temp_audio_path, codec="wav")
|
35 |
|
36 |
+
# Transcribe audio
|
37 |
+
text = pipe(temp_audio_path)["text"]
|
38 |
|
39 |
+
# Clean up temporary files
|
40 |
+
os.remove(temp_video.name)
|
41 |
+
os.remove(temp_audio_path)
|
42 |
|
|
|
|
|
43 |
return text
|
44 |
|
45 |
iface = gr.Interface(
|
46 |
+
fn=transcribe_video,
|
47 |
+
inputs="text",
|
48 |
outputs="text",
|
49 |
+
live=True,
|
50 |
+
title="Video Transcription",
|
51 |
+
description="Paste the URL of a video to transcribe the spoken content.",
|
52 |
)
|
53 |
|
54 |
iface.launch()
|
requirements.txt
CHANGED
@@ -1,3 +1,5 @@
|
|
1 |
transformers
|
2 |
torch
|
3 |
tensorflow
|
|
|
|
|
|
1 |
transformers
|
2 |
torch
|
3 |
tensorflow
|
4 |
+
moviepy==1.0.3
|
5 |
+
ffmpeg
|