# from transformers import pipeline # import gradio as gr # # pipe = pipeline(model="dacavi/whisper-small-hi") # change to "your-username/the-name-you-picked" # def transcribe(audio): # text = pipe(audio)["text"] # return text # # iface = gr.Interface( # fn=transcribe, # inputs=gr.Audio(sources="microphone", type="filepath"), # outputs="text", # title="Whisper Small Hindi", # description="Realtime demo for Hindi speech recognition using a fine-tuned Whisper small model.", # ) # # iface.launch() import gradio as gr from transformers import pipeline from moviepy.editor import VideoFileClip import tempfile import os from pydub import AudioSegment from huggingface_hub import login # with open("../../token.txt", "r") as file: # token = file.readline().strip() # # # login(token=token, add_to_git_credential=True) pipe = pipeline(model="dacavi/whisper-small-hi") def transcribe_video(video_url): # Download video and extract audio with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio: # os.system(f"yt-dlp -o {temp_audio.name} -x --audio-format wav {video_url}") os.system(f"yt-dlp -o audioSample.wav -x --audio-format wav {video_url}") print("Downloaded audio:", temp_audio.name) # Transcribe audio text = pipe("audioSample.wav")["text"] # Clean up temporary files os.remove("audioSample.wav") return text iface = gr.Interface( fn=transcribe_video, inputs="text", outputs="text", live=True, title="Video Transcription", description="Paste the URL of a video to transcribe the spoken content.", ) iface.launch()