simple-voice-ai-assistant / transcription.py
cdnieto's picture
Add app, requirements and transcription script
bd5f256
import io
import groq
import numpy as np
import soundfile as sf
def transcribe_audio(audio, api_key):
if audio is None:
return ""
client = groq.Client(api_key=api_key)
audio_data = audio[1] # Get the numpy arry from the tuple
buffer = io.BytesIO()
sf.write(buffer, audio_data, audio[0], format='wav')
buffer.seek(0)
bytes_audio = io.BytesIO()
np.save(bytes_audio, audio_data)
bytes_audio.seek(0)
try:
# Use Distil-Whisper English powered by Groq for transcription
completion = client.audio.transcriptions.create(
model="distil-whisper-large-v3-en",
file=("audio.wav", buffer),
response_format="text"
)
return completion
except Exception as e:
return f"Error in transcription: {e}"
def generate_response(transcription, api_key):
if not transcription:
return "No transcription available. Please try speaking again."
client = groq.Client(api_key=api_key)
try:
completion = client.chat.completions.create(
model="llama3-70b-8192",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": transcription}
]
)
return completion.choices[0].message.content
except Exception as e:
return f"Error in response generation: {e}"
def process_audio(audio, api_key):
if not api_key:
return "Please enter your Groq API key.", "API key is required."
transcription = transcribe_audio(audio, api_key)
response = generate_response(transcription, api_key)
return transcription, response