import gradio as gr from google.cloud import speech from microphone import MicrophoneStream from utils import listen_print_loop # Audio recording parameters RATE = 16000 CHUNK = int(RATE / 10) # 100ms LANGUAGE = "id-ID" transcribe_client = speech.SpeechClient() config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=LANGUAGE, ) streaming_config = speech.StreamingRecognitionConfig( config=config, interim_results=True ) async def main(audio) -> None: print("Streaming started ...") with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = ( speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator ) responses = transcribe_client.streaming_recognize(streaming_config, requests) return await listen_print_loop(responses) demo = gr.Interface( fn=main, inputs=[ gr.Audio(sources="microphone", streaming=True, label="Input Speech") ], outputs=[ gr.Textbox(label="Transcription"), gr.Audio(label="Audio") ], live=True) if __name__ == "__main__": demo.launch()