Spaces:
Paused
Paused
Add OpenAI API setup and enhance audio handling in smartquery app; remove deprecated text generation function; improve error handling and logging for audio processing.
Browse files- smartquery/app.py +31 -19
smartquery/app.py
CHANGED
@@ -18,6 +18,7 @@ ELEVENLABS_VOICE_ID = os.environ.get("ELEVENLABS_VOICE_ID")
|
|
18 |
if not ELEVENLABS_API_KEY or not ELEVENLABS_VOICE_ID:
|
19 |
raise ValueError("ELEVENLABS_API_KEY and ELEVENLABS_VOICE_ID must be set")
|
20 |
|
|
|
21 |
client = AsyncOpenAI()
|
22 |
|
23 |
@cl.step(type="tool")
|
@@ -27,38 +28,46 @@ async def speech_to_text(audio_file):
|
|
27 |
)
|
28 |
return response.text
|
29 |
|
30 |
-
@cl.step(type="tool")
|
31 |
-
async def generate_text_answer(transcription, images):
|
32 |
-
model = "gpt-4o"
|
33 |
-
messages = [{"role": "user", "content": transcription}]
|
34 |
-
response = await client.chat.completions.create(
|
35 |
-
messages=messages, model=model, temperature=0.3
|
36 |
-
)
|
37 |
-
return response.choices[0].message.content
|
38 |
|
39 |
@cl.on_chat_start
|
40 |
async def on_chat_start():
|
41 |
cl.user_session.set("agent", SQLAgent)
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
@cl.on_message
|
44 |
async def on_message(message: cl.Message):
|
45 |
await process_message(message.content)
|
46 |
|
47 |
@cl.on_audio_chunk
|
48 |
async def on_audio_chunk(chunk: cl.AudioChunk):
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
56 |
|
57 |
-
|
|
|
|
|
|
|
58 |
|
59 |
@cl.on_audio_end
|
60 |
async def on_audio_end(elements: list[Audio]):
|
61 |
try:
|
|
|
62 |
audio_buffer: BytesIO = cl.user_session.get("audio_buffer")
|
63 |
audio_buffer.seek(0)
|
64 |
audio_file = audio_buffer.read()
|
@@ -76,21 +85,24 @@ async def on_audio_end(elements: list[Audio]):
|
|
76 |
|
77 |
whisper_input = (audio_buffer.name, audio_file, audio_mime_type)
|
78 |
transcription = await speech_to_text(whisper_input)
|
|
|
79 |
|
80 |
await process_message(transcription)
|
|
|
81 |
except Exception as e:
|
82 |
print(f"Error processing audio: {e}")
|
83 |
await cl.Message(content="Error processing audio. Please try again.").send()
|
|
|
84 |
finally:
|
85 |
# Reset audio buffer and mime type
|
86 |
cl.user_session.set("audio_buffer", None)
|
87 |
cl.user_session.set("audio_mime_type", None)
|
88 |
print("Audio buffer reset")
|
89 |
|
90 |
-
async def process_message(content: str, answer_message=None
|
91 |
agent = cl.user_session.get("agent")
|
92 |
-
cb = cl.AsyncLangchainCallbackHandler(stream_final_answer=True)
|
93 |
-
config = RunnableConfig(callbacks=[cb])
|
94 |
|
95 |
async with cl.Step(name="SmartQuery Agent", root=True) as step:
|
96 |
step.input = content
|
|
|
18 |
if not ELEVENLABS_API_KEY or not ELEVENLABS_VOICE_ID:
|
19 |
raise ValueError("ELEVENLABS_API_KEY and ELEVENLABS_VOICE_ID must be set")
|
20 |
|
21 |
+
# Set up the OpenAI API
|
22 |
client = AsyncOpenAI()
|
23 |
|
24 |
@cl.step(type="tool")
|
|
|
28 |
)
|
29 |
return response.text
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
@cl.on_chat_start
|
33 |
async def on_chat_start():
|
34 |
cl.user_session.set("agent", SQLAgent)
|
35 |
|
36 |
+
# Configure Chainlit features for audio capture
|
37 |
+
cl.user_session.set("audio_settings", {
|
38 |
+
"min_decibels": -80,
|
39 |
+
"initial_silence_timeout": 500,
|
40 |
+
"silence_timeout": 2500,
|
41 |
+
"max_duration": 15000,
|
42 |
+
"chunk_duration": 1000,
|
43 |
+
"sample_rate": 44100
|
44 |
+
})
|
45 |
+
print("Chat session started and audio settings configured")
|
46 |
+
|
47 |
@cl.on_message
|
48 |
async def on_message(message: cl.Message):
|
49 |
await process_message(message.content)
|
50 |
|
51 |
@cl.on_audio_chunk
|
52 |
async def on_audio_chunk(chunk: cl.AudioChunk):
|
53 |
+
print("Received audio chunk")
|
54 |
+
try:
|
55 |
+
if chunk.isStart:
|
56 |
+
buffer = BytesIO()
|
57 |
+
buffer.name = f"input_audio.{chunk.mimeType.split('/')[1]}"
|
58 |
+
# Initialize the session for a new audio stream
|
59 |
+
cl.user_session.set("audio_buffer", buffer)
|
60 |
+
cl.user_session.set("audio_mime_type", chunk.mimeType)
|
61 |
|
62 |
+
cl.user_session.get("audio_buffer").write(chunk.data)
|
63 |
+
|
64 |
+
except Exception as e:
|
65 |
+
print(f"Error handling audio chunk: {e}")
|
66 |
|
67 |
@cl.on_audio_end
|
68 |
async def on_audio_end(elements: list[Audio]):
|
69 |
try:
|
70 |
+
print("Audio recording ended")
|
71 |
audio_buffer: BytesIO = cl.user_session.get("audio_buffer")
|
72 |
audio_buffer.seek(0)
|
73 |
audio_file = audio_buffer.read()
|
|
|
85 |
|
86 |
whisper_input = (audio_buffer.name, audio_file, audio_mime_type)
|
87 |
transcription = await speech_to_text(whisper_input)
|
88 |
+
print("Transcription received:", transcription)
|
89 |
|
90 |
await process_message(transcription)
|
91 |
+
|
92 |
except Exception as e:
|
93 |
print(f"Error processing audio: {e}")
|
94 |
await cl.Message(content="Error processing audio. Please try again.").send()
|
95 |
+
|
96 |
finally:
|
97 |
# Reset audio buffer and mime type
|
98 |
cl.user_session.set("audio_buffer", None)
|
99 |
cl.user_session.set("audio_mime_type", None)
|
100 |
print("Audio buffer reset")
|
101 |
|
102 |
+
async def process_message(content: str, answer_message=None):
|
103 |
agent = cl.user_session.get("agent")
|
104 |
+
cb = cl.AsyncLangchainCallbackHandler(stream_final_answer=True) # Create a callback handler
|
105 |
+
config = RunnableConfig(callbacks=[cb]) # Add the callback handler to the config
|
106 |
|
107 |
async with cl.Step(name="SmartQuery Agent", root=True) as step:
|
108 |
step.input = content
|