Spaces:
Sleeping
Sleeping
import assemblyai as aai | |
from elevenlabs import generate, stream | |
from openai import OpenAI | |
class AI_Assistant: | |
def __init__(self): | |
aai.settings.api_key = "ASSEMBLYAI-API-KEY" | |
self.openai_client = OpenAI(api_key="OPENAI-API-KEY") | |
self.elevenlabs_api_key = "ELEVENLABS-API-KEY" | |
self.transcriber = None | |
# Context for food ordering in a restaurant | |
self.full_transcript = [ | |
{"role": "system", "content": "You are a virtual assistant for a restaurant. Help customers with food ordering, menu inquiries, and table reservations."}, | |
] | |
def start_transcription(self): | |
self.transcriber = aai.RealtimeTranscriber( | |
sample_rate=16000, | |
on_data=self.on_data, | |
on_error=self.on_error, | |
on_open=self.on_open, | |
on_close=self.on_close, | |
end_utterance_silence_threshold=1000, | |
) | |
self.transcriber.connect() | |
microphone_stream = aai.extras.MicrophoneStream(sample_rate=16000) | |
self.transcriber.stream(microphone_stream) | |
def stop_transcription(self): | |
if self.transcriber: | |
self.transcriber.close() | |
self.transcriber = None | |
def on_open(self, session_opened: aai.RealtimeSessionOpened): | |
print("Session ID:", session_opened.session_id) | |
def on_data(self, transcript: aai.RealtimeTranscript): | |
if not transcript.text: | |
return | |
if isinstance(transcript, aai.RealtimeFinalTranscript): | |
self.generate_ai_response(transcript) | |
else: | |
print(transcript.text, end="\r") | |
def on_error(self, error: aai.RealtimeError): | |
print("An error occurred:", error) | |
def on_close(self): | |
print("Session closed.") | |
def generate_ai_response(self, transcript): | |
self.stop_transcription() | |
self.full_transcript.append({"role": "user", "content": transcript.text}) | |
print(f"\nCustomer: {transcript.text}\n") | |
response = self.openai_client.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=self.full_transcript | |
) | |
ai_response = response.choices[0].message.content | |
self.generate_audio(ai_response) | |
self.start_transcription() | |
print("\nListening for the next input...\n") | |
def generate_audio(self, text): | |
self.full_transcript.append({"role": "assistant", "content": text}) | |
print(f"\nAI Assistant: {text}") | |
audio_stream = generate( | |
api_key=self.elevenlabs_api_key, | |
text=text, | |
voice="Rachel", | |
stream=True | |
) | |
stream(audio_stream) | |
if __name__ == "__main__": | |
greeting = "Welcome to Gourmet Bistro! My name is Sandy. How may I assist you today?" | |
ai_assistant = AI_Assistant() | |
ai_assistant.generate_audio(greeting) | |
ai_assistant.start_transcription() | |