voice-ai / app.py
Adipta's picture
init
27c3220 verified
raw
history blame
1.26 kB
import gradio as gr
from google.cloud import speech
from microphone import MicrophoneStream
from utils import listen_print_loop
# Audio recording parameters
RATE = 16000
CHUNK = int(RATE / 10) # 100ms
LANGUAGE = "id-ID"
transcribe_client = speech.SpeechClient()
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=RATE,
language_code=LANGUAGE,
)
streaming_config = speech.StreamingRecognitionConfig(
config=config, interim_results=True
)
async def main(audio) -> None:
print("Streaming started ...")
with MicrophoneStream(RATE, CHUNK) as stream:
audio_generator = stream.generator()
requests = (
speech.StreamingRecognizeRequest(audio_content=content)
for content in audio_generator
)
responses = transcribe_client.streaming_recognize(streaming_config, requests)
return await listen_print_loop(responses)
demo = gr.Interface(
fn=main,
inputs=[
gr.Audio(sources="microphone", streaming=True, label="Input Speech")
],
outputs=[
gr.Textbox(label="Transcription"),
gr.Audio(label="Audio")
],
live=True)
if __name__ == "__main__":
demo.launch()