# from transformers import pipeline # import gradio as gr # # pipe = pipeline(model="dacavi/whisper-small-hi") # change to "your-username/the-name-you-picked" # def transcribe(audio): # text = pipe(audio)["text"] # return text # # iface = gr.Interface( # fn=transcribe, # inputs=gr.Audio(sources="microphone", type="filepath"), # outputs="text", # title="Whisper Small Hindi", # description="Realtime demo for Hindi speech recognition using a fine-tuned Whisper small model.", # ) # # iface.launch() import gradio as gr from transformers import pipeline from moviepy.editor import VideoFileClip import tempfile import os from pydub import AudioSegment from huggingface_hub import login with open("../../token.txt", "r") as file: token = file.readline().strip() login(token=token, add_to_git_credential=True) pipe = pipeline(model="dacavi/whisper-small-hi") def transcribe_video(video_url): # Download video and extract audio with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio: # os.system(f"yt-dlp -o {temp_audio.name} -x --audio-format wav {video_url}") os.system(f"yt-dlp -o audioSample.wav -x --audio-format wav {video_url}") print("Downloaded audio:", temp_audio.name) # Transcribe audio text = pipe("audioSample.wav")["text"] # Clean up temporary files os.remove("audioSample.wav") return text print(transcribe_video("https://www.youtube.com/watch?v=8FkLRUJj-o0")) # iface = gr.Interface( # fn=transcribe_video, # inputs="text", # outputs="text", # live=True, # title="Video Transcription", # description="Paste the URL of a video to transcribe the spoken content.", # ) # # iface.launch()