File size: 1,571 Bytes
93fc4c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6acc853
93fc4c5
 
 
 
 
 
6acc853
93fc4c5
 
 
 
 
 
 
 
bd86a75
93fc4c5
 
bd86a75
93fc4c5
 
 
bd86a75
 
 
 
93fc4c5
 
bd86a75
93fc4c5
 
 
bd86a75
6acc853
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# from transformers import pipeline
# import gradio as gr
#
# pipe = pipeline(model="dacavi/whisper-small-hi")  # change to "your-username/the-name-you-picked"
# def transcribe(audio):
#     text = pipe(audio)["text"]
#     return text
#
# iface = gr.Interface(
#     fn=transcribe,
#     inputs=gr.Audio(sources="microphone", type="filepath"),
#     outputs="text",
#     title="Whisper Small Hindi",
#     description="Realtime demo for Hindi speech recognition using a fine-tuned Whisper small model.",
# )
#
# iface.launch()

import gradio as gr
from transformers import pipeline
from moviepy.editor import VideoFileClip
import tempfile
import os

pipe = pipeline(model="dacavi/whisper-small-hi")

def transcribe_video(video_url):
    # Download video and extract audio
    with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video:
        os.system(f"youtube-dl -o {temp_video.name} {video_url}")
        video_clip = VideoFileClip(temp_video.name)
        audio_clip = video_clip.audio
        temp_audio_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
        audio_clip.write_audiofile(temp_audio_path, codec="wav")

    # Transcribe audio
    text = pipe(temp_audio_path)["text"]

    # Clean up temporary files
    os.remove(temp_video.name)
    os.remove(temp_audio_path)

    return text

iface = gr.Interface(
    fn=transcribe_video,
    inputs="text",
    outputs="text",
    live=True,
    title="Video Transcription",
    description="Paste the URL of a video to transcribe the spoken content.",
)

iface.launch()