import os import time import gradio as gr import numpy as np import soundfile as sf from groq import Groq from openai import OpenAI groq_client = Groq(api_key=os.getenv('GROQ_API_KEY')) openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY')) def transcribe(audio_path): """ Transcribe the audio segment using Whisper. """ with open(audio_path, 'rb') as audio_file: transcription = openai_client.audio.transcriptions.create( file=audio_file, language="en", model="whisper-1" ) return transcription.text def autocomplete(text): """ Autocomplete the text using Gemma. """ if text != "": response = groq_client.chat.completions.create( model='gemma-7b-it', messages=[{"role": "system", "content": "You are a friendly assistant."}, {"role": "user", "content": text}] ) return response.choices[0].message.content def process_audio(input_audio): """ Process the audio input by transcribing and completing the sentences. """ # Now you can use the audio_file_path with soundfile.read() audio_data, sample_rate = sf.read(input_audio) # Ensure mono audio if len(audio_data.shape) > 1: audio_data = np.mean(audio_data, axis=1) transcription_list = [] for start in range(0, len(audio_data), sample_rate): end = start + sample_rate segment = audio_data[start:end] # Temporarily saving each segment to a file (Whisper requires a file input) segment_filename = f"/tmp/audio_segment_{start}.wav" sf.write(segment_filename, segment, sample_rate) # Transcribe the audio segment transcription = transcribe(segment_filename) transcription_list.append(transcription) # # Send the transcription for completion completion_result = autocomplete(transcription) text = f"Qn: {transcription} \n \n Ans: {completion_result}" return text # Define the Gradio interface interface = gr.Interface( fn=process_audio, inputs=gr.Audio(sources="microphone", streaming=True, type="filepath"), outputs=gr.Markdown(), title="Dear Gemma", description="Talk to the AI assistant. It completes your sentences in real time.", live=True, allow_flagging="never" ) if __name__ == "__main__": interface.launch()