# from transformers import pipeline | |
# import gradio as gr | |
# | |
# pipe = pipeline(model="dacavi/whisper-small-hi") # change to "your-username/the-name-you-picked" | |
# def transcribe(audio): | |
# text = pipe(audio)["text"] | |
# return text | |
# | |
# iface = gr.Interface( | |
# fn=transcribe, | |
# inputs=gr.Audio(sources="microphone", type="filepath"), | |
# outputs="text", | |
# title="Whisper Small Hindi", | |
# description="Realtime demo for Hindi speech recognition using a fine-tuned Whisper small model.", | |
# ) | |
# | |
# iface.launch() | |
import gradio as gr | |
from transformers import pipeline | |
from moviepy.editor import VideoFileClip | |
import tempfile | |
import os | |
from pydub import AudioSegment | |
from huggingface_hub import login | |
with open("../../token.txt", "r") as file: | |
token = file.readline().strip() | |
login(token=token, add_to_git_credential=True) | |
pipe = pipeline(model="dacavi/whisper-small-hi") | |
def transcribe_video(video_url): | |
# Download video and extract audio | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio: | |
# os.system(f"yt-dlp -o {temp_audio.name} -x --audio-format wav {video_url}") | |
os.system(f"yt-dlp -o audioSample.wav -x --audio-format wav {video_url}") | |
print("Downloaded audio:", temp_audio.name) | |
# Transcribe audio | |
text = pipe("audioSample.wav")["text"] | |
# Clean up temporary files | |
os.remove("audioSample.wav") | |
return text | |
print(transcribe_video("https://www.youtube.com/watch?v=8FkLRUJj-o0")) | |
# iface = gr.Interface( | |
# fn=transcribe_video, | |
# inputs="text", | |
# outputs="text", | |
# live=True, | |
# title="Video Transcription", | |
# description="Paste the URL of a video to transcribe the spoken content.", | |
# ) | |
# | |
# iface.launch() | |