JulsdL commited on
Commit
5a57820
·
1 Parent(s): cd7515d

Add OpenAI API setup and enhance audio handling in smartquery app; remove deprecated text generation function; improve error handling and logging for audio processing.

Browse files
Files changed (1) hide show
  1. smartquery/app.py +31 -19
smartquery/app.py CHANGED
@@ -18,6 +18,7 @@ ELEVENLABS_VOICE_ID = os.environ.get("ELEVENLABS_VOICE_ID")
18
  if not ELEVENLABS_API_KEY or not ELEVENLABS_VOICE_ID:
19
  raise ValueError("ELEVENLABS_API_KEY and ELEVENLABS_VOICE_ID must be set")
20
 
 
21
  client = AsyncOpenAI()
22
 
23
  @cl.step(type="tool")
@@ -27,38 +28,46 @@ async def speech_to_text(audio_file):
27
  )
28
  return response.text
29
 
30
- @cl.step(type="tool")
31
- async def generate_text_answer(transcription, images):
32
- model = "gpt-4o"
33
- messages = [{"role": "user", "content": transcription}]
34
- response = await client.chat.completions.create(
35
- messages=messages, model=model, temperature=0.3
36
- )
37
- return response.choices[0].message.content
38
 
39
  @cl.on_chat_start
40
  async def on_chat_start():
41
  cl.user_session.set("agent", SQLAgent)
42
 
 
 
 
 
 
 
 
 
 
 
 
43
  @cl.on_message
44
  async def on_message(message: cl.Message):
45
  await process_message(message.content)
46
 
47
  @cl.on_audio_chunk
48
  async def on_audio_chunk(chunk: cl.AudioChunk):
49
- if chunk.isStart:
50
- buffer = BytesIO()
51
- # This is required for whisper to recognize the file type
52
- buffer.name = f"input_audio.{chunk.mimeType.split('/')[1]}"
53
- # Initialize the session for a new audio stream
54
- cl.user_session.set("audio_buffer", buffer)
55
- cl.user_session.set("audio_mime_type", chunk.mimeType)
 
56
 
57
- cl.user_session.get("audio_buffer").write(chunk.data)
 
 
 
58
 
59
  @cl.on_audio_end
60
  async def on_audio_end(elements: list[Audio]):
61
  try:
 
62
  audio_buffer: BytesIO = cl.user_session.get("audio_buffer")
63
  audio_buffer.seek(0)
64
  audio_file = audio_buffer.read()
@@ -76,21 +85,24 @@ async def on_audio_end(elements: list[Audio]):
76
 
77
  whisper_input = (audio_buffer.name, audio_file, audio_mime_type)
78
  transcription = await speech_to_text(whisper_input)
 
79
 
80
  await process_message(transcription)
 
81
  except Exception as e:
82
  print(f"Error processing audio: {e}")
83
  await cl.Message(content="Error processing audio. Please try again.").send()
 
84
  finally:
85
  # Reset audio buffer and mime type
86
  cl.user_session.set("audio_buffer", None)
87
  cl.user_session.set("audio_mime_type", None)
88
  print("Audio buffer reset")
89
 
90
- async def process_message(content: str, answer_message=None, mime_type=None):
91
  agent = cl.user_session.get("agent")
92
- cb = cl.AsyncLangchainCallbackHandler(stream_final_answer=True)
93
- config = RunnableConfig(callbacks=[cb])
94
 
95
  async with cl.Step(name="SmartQuery Agent", root=True) as step:
96
  step.input = content
 
18
  if not ELEVENLABS_API_KEY or not ELEVENLABS_VOICE_ID:
19
  raise ValueError("ELEVENLABS_API_KEY and ELEVENLABS_VOICE_ID must be set")
20
 
21
+ # Set up the OpenAI API
22
  client = AsyncOpenAI()
23
 
24
  @cl.step(type="tool")
 
28
  )
29
  return response.text
30
 
 
 
 
 
 
 
 
 
31
 
32
  @cl.on_chat_start
33
  async def on_chat_start():
34
  cl.user_session.set("agent", SQLAgent)
35
 
36
+ # Configure Chainlit features for audio capture
37
+ cl.user_session.set("audio_settings", {
38
+ "min_decibels": -80,
39
+ "initial_silence_timeout": 500,
40
+ "silence_timeout": 2500,
41
+ "max_duration": 15000,
42
+ "chunk_duration": 1000,
43
+ "sample_rate": 44100
44
+ })
45
+ print("Chat session started and audio settings configured")
46
+
47
  @cl.on_message
48
  async def on_message(message: cl.Message):
49
  await process_message(message.content)
50
 
51
  @cl.on_audio_chunk
52
  async def on_audio_chunk(chunk: cl.AudioChunk):
53
+ print("Received audio chunk")
54
+ try:
55
+ if chunk.isStart:
56
+ buffer = BytesIO()
57
+ buffer.name = f"input_audio.{chunk.mimeType.split('/')[1]}"
58
+ # Initialize the session for a new audio stream
59
+ cl.user_session.set("audio_buffer", buffer)
60
+ cl.user_session.set("audio_mime_type", chunk.mimeType)
61
 
62
+ cl.user_session.get("audio_buffer").write(chunk.data)
63
+
64
+ except Exception as e:
65
+ print(f"Error handling audio chunk: {e}")
66
 
67
  @cl.on_audio_end
68
  async def on_audio_end(elements: list[Audio]):
69
  try:
70
+ print("Audio recording ended")
71
  audio_buffer: BytesIO = cl.user_session.get("audio_buffer")
72
  audio_buffer.seek(0)
73
  audio_file = audio_buffer.read()
 
85
 
86
  whisper_input = (audio_buffer.name, audio_file, audio_mime_type)
87
  transcription = await speech_to_text(whisper_input)
88
+ print("Transcription received:", transcription)
89
 
90
  await process_message(transcription)
91
+
92
  except Exception as e:
93
  print(f"Error processing audio: {e}")
94
  await cl.Message(content="Error processing audio. Please try again.").send()
95
+
96
  finally:
97
  # Reset audio buffer and mime type
98
  cl.user_session.set("audio_buffer", None)
99
  cl.user_session.set("audio_mime_type", None)
100
  print("Audio buffer reset")
101
 
102
+ async def process_message(content: str, answer_message=None):
103
  agent = cl.user_session.get("agent")
104
+ cb = cl.AsyncLangchainCallbackHandler(stream_final_answer=True) # Create a callback handler
105
+ config = RunnableConfig(callbacks=[cb]) # Add the callback handler to the config
106
 
107
  async with cl.Step(name="SmartQuery Agent", root=True) as step:
108
  step.input = content