|
import gradio as gr |
|
|
|
from google.cloud import speech |
|
from microphone import MicrophoneStream |
|
from utils import listen_print_loop |
|
|
|
|
|
|
|
RATE = 16000 |
|
CHUNK = int(RATE / 10) |
|
LANGUAGE = "id-ID" |
|
|
|
transcribe_client = speech.SpeechClient() |
|
config = speech.RecognitionConfig( |
|
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, |
|
sample_rate_hertz=RATE, |
|
language_code=LANGUAGE, |
|
) |
|
|
|
streaming_config = speech.StreamingRecognitionConfig( |
|
config=config, interim_results=True |
|
) |
|
|
|
async def main(audio) -> None: |
|
|
|
print("Streaming started ...") |
|
|
|
with MicrophoneStream(RATE, CHUNK) as stream: |
|
audio_generator = stream.generator() |
|
requests = ( |
|
speech.StreamingRecognizeRequest(audio_content=content) |
|
for content in audio_generator |
|
) |
|
|
|
responses = transcribe_client.streaming_recognize(streaming_config, requests) |
|
|
|
return await listen_print_loop(responses) |
|
|
|
demo = gr.Interface( |
|
fn=main, |
|
inputs=[ |
|
gr.Audio(sources="microphone", streaming=True, label="Input Speech") |
|
], |
|
outputs=[ |
|
gr.Textbox(label="Transcription"), |
|
gr.Audio(label="Audio") |
|
], |
|
live=True) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |