dschandra commited on
Commit
3b0cc06
·
verified ·
1 Parent(s): 3560f6a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -83
app.py CHANGED
@@ -1,86 +1,68 @@
1
- import assemblyai as aai
2
- from elevenlabs import generate, stream
3
- from openai import OpenAI
4
-
5
- class AI_Assistant:
6
- def __init__(self):
7
- aai.settings.api_key = "ASSEMBLYAI-API-KEY"
8
- self.openai_client = OpenAI(api_key="OPENAI-API-KEY")
9
- self.elevenlabs_api_key = "ELEVENLABS-API-KEY"
10
-
11
- self.transcriber = None
12
-
13
- # Context for food ordering in a restaurant
14
- self.full_transcript = [
15
- {"role": "system", "content": "You are a virtual assistant for a restaurant. Help customers with food ordering, menu inquiries, and table reservations."},
16
- ]
17
-
18
- def start_transcription(self):
19
- self.transcriber = aai.RealtimeTranscriber(
20
- sample_rate=16000,
21
- on_data=self.on_data,
22
- on_error=self.on_error,
23
- on_open=self.on_open,
24
- on_close=self.on_close,
25
- end_utterance_silence_threshold=1000,
26
- )
27
- self.transcriber.connect()
28
- microphone_stream = aai.extras.MicrophoneStream(sample_rate=16000)
29
- self.transcriber.stream(microphone_stream)
30
-
31
- def stop_transcription(self):
32
- if self.transcriber:
33
- self.transcriber.close()
34
- self.transcriber = None
35
-
36
- def on_open(self, session_opened: aai.RealtimeSessionOpened):
37
- print("Session ID:", session_opened.session_id)
38
-
39
- def on_data(self, transcript: aai.RealtimeTranscript):
40
- if not transcript.text:
41
- return
42
-
43
- if isinstance(transcript, aai.RealtimeFinalTranscript):
44
- self.generate_ai_response(transcript)
45
- else:
46
- print(transcript.text, end="\r")
47
-
48
- def on_error(self, error: aai.RealtimeError):
49
- print("An error occurred:", error)
50
-
51
- def on_close(self):
52
- print("Session closed.")
53
-
54
- def generate_ai_response(self, transcript):
55
- self.stop_transcription()
56
- self.full_transcript.append({"role": "user", "content": transcript.text})
57
- print(f"\nCustomer: {transcript.text}\n")
58
-
59
- response = self.openai_client.chat.completions.create(
60
- model="gpt-3.5-turbo",
61
- messages=self.full_transcript
62
  )
63
-
64
- ai_response = response.choices[0].message.content
65
- self.generate_audio(ai_response)
66
- self.start_transcription()
67
- print("\nListening for the next input...\n")
68
-
69
- def generate_audio(self, text):
70
- self.full_transcript.append({"role": "assistant", "content": text})
71
- print(f"\nAI Assistant: {text}")
72
-
73
- audio_stream = generate(
74
- api_key=self.elevenlabs_api_key,
75
- text=text,
76
- voice="Rachel",
77
- stream=True
78
- )
79
- stream(audio_stream)
80
-
 
 
 
 
 
 
 
 
 
 
81
 
82
  if __name__ == "__main__":
83
- greeting = "Welcome to Gourmet Bistro! My name is Sandy. How may I assist you today?"
84
- ai_assistant = AI_Assistant()
85
- ai_assistant.generate_audio(greeting)
86
- ai_assistant.start_transcription()
 
1
+ import gradio as gr
2
+ from gtts import gTTS
3
+ import openai
4
+ import speech_recognition as sr
5
+ import os
6
+
7
+ # Set OpenAI API Key
8
+ openai.api_key = "YOUR_OPENAI_API_KEY" # Replace with your OpenAI API Key
9
+
10
+ # Text-to-Speech Function
11
+ def text_to_speech(response_text):
12
+ tts = gTTS(response_text, lang="en")
13
+ audio_file = "response.mp3"
14
+ tts.save(audio_file)
15
+ return audio_file
16
+
17
+ # Speech Recognition Function
18
+ def speech_to_text(audio_file):
19
+ recognizer = sr.Recognizer()
20
+ with sr.AudioFile(audio_file) as source:
21
+ audio_data = recognizer.record(source)
22
+ try:
23
+ return recognizer.recognize_google(audio_data)
24
+ except sr.UnknownValueError:
25
+ return "I'm sorry, I couldn't understand that. Could you repeat?"
26
+ except sr.RequestError:
27
+ return "There was an error with the speech recognition service."
28
+
29
+ # Chatbot Logic using OpenAI GPT
30
+ def chatbot_response(user_input):
31
+ try:
32
+ response = openai.Completion.create(
33
+ engine="text-davinci-003", # Use a powerful GPT model
34
+ prompt=f"User: {user_input}\nChatbot:",
35
+ max_tokens=150,
36
+ temperature=0.7,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  )
38
+ return response.choices[0].text.strip()
39
+ except Exception as e:
40
+ return f"Error generating response: {e}"
41
+
42
+ # Gradio Interface Logic
43
+ def process_interaction(audio_file):
44
+ # Convert user speech to text
45
+ user_text = speech_to_text(audio_file)
46
+ if "Error" in user_text or "sorry" in user_text:
47
+ return user_text, None
48
+
49
+ # Get chatbot response
50
+ chatbot_reply = chatbot_response(user_text)
51
+
52
+ # Convert chatbot reply to speech
53
+ chatbot_audio = text_to_speech(chatbot_reply)
54
+
55
+ return chatbot_reply, chatbot_audio
56
+
57
+ # Gradio Interface
58
+ interface = gr.Interface(
59
+ fn=process_interaction,
60
+ inputs=gr.Audio(source="microphone", type="filepath"),
61
+ outputs=[gr.Textbox(label="Chatbot Reply"), gr.Audio(label="Chatbot Voice Reply")],
62
+ title="Face-to-Face Chatbot",
63
+ description="Talk to this chatbot like you're having a real conversation! Speak into your microphone to start.",
64
+ live=True,
65
+ )
66
 
67
  if __name__ == "__main__":
68
+ interface.launch()