import os from huggingface_hub.hf_api import HfFolder HfFolder.save_token(os.environ.get("auth_token")) from huggingface_hub import Repository import gradio as gr from faster_whisper import WhisperModel import numpy as np from scipy.io.wavfile import write repo = Repository(local_dir="huggingface-hub", clone_from="https://huggingface.co/nadsoft/faster-hamsa") file_name = "recording0.wav" # check if the file exists if os.path.exists(file_name): os.remove(file_name) transcriber = WhisperModel(repo.local_dir,device="cuda", compute_type="float16") model = transcriber def transcribe(stream, new_chunk): sr, y = new_chunk y = y.astype(np.float32) y /= np.max(np.abs(y)) if stream is not None: stream = np.concatenate([stream, y]) else: stream = y write("recording0.wav", sr, stream) segments, _ = model.transcribe("recording0.wav", language="ar") segments = list(segments) # The transcription will actually run here return stream, str(segments[0][2]) demo = gr.Interface( transcribe, ["state", gr.Audio(sources=["microphone"], streaming=True)], ["state", "text"], live=True, ) demo.launch()