import io import groq import numpy as np import soundfile as sf def transcribe_audio(audio, api_key): if audio is None: return "" client = groq.Client(api_key=api_key) audio_data = audio[1] # Get the numpy arry from the tuple buffer = io.BytesIO() sf.write(buffer, audio_data, audio[0], format='wav') buffer.seek(0) bytes_audio = io.BytesIO() np.save(bytes_audio, audio_data) bytes_audio.seek(0) try: # Use Distil-Whisper English powered by Groq for transcription completion = client.audio.transcriptions.create( model="distil-whisper-large-v3-en", file=("audio.wav", buffer), response_format="text" ) return completion except Exception as e: return f"Error in transcription: {e}" def generate_response(transcription, api_key): if not transcription: return "No transcription available. Please try speaking again." client = groq.Client(api_key=api_key) try: completion = client.chat.completions.create( model="llama3-70b-8192", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": transcription} ] ) return completion.choices[0].message.content except Exception as e: return f"Error in response generation: {e}" def process_audio(audio, api_key): if not api_key: return "Please enter your Groq API key.", "API key is required." transcription = transcribe_audio(audio, api_key) response = generate_response(transcription, api_key) return transcription, response