JulsdL's picture
Add initial database setup and comprehensive documentation for SmartQuery application; include Dockerfile for containerized deployment
adf8836
raw
history blame
3.38 kB
from io import BytesIO
import os
import chainlit as cl
import httpx
from dotenv import load_dotenv
from langchain.schema.runnable.config import RunnableConfig
from smartquery.sql_agent import SQLAgent
from openai import AsyncOpenAI
from chainlit.element import Audio
# Load the .env file
load_dotenv()
# Set up the transcription API (e.g., Eleven Labs)
ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY")
ELEVENLABS_VOICE_ID = os.environ.get("ELEVENLABS_VOICE_ID")
if not ELEVENLABS_API_KEY or not ELEVENLABS_VOICE_ID:
raise ValueError("ELEVENLABS_API_KEY and ELEVENLABS_VOICE_ID must be set")
client = AsyncOpenAI()
@cl.step(type="tool")
async def speech_to_text(audio_file):
response = await client.audio.transcriptions.create(
model="whisper-1", file=audio_file
)
return response.text
@cl.step(type="tool")
async def generate_text_answer(transcription, images):
model = "gpt-4o"
messages = [{"role": "user", "content": transcription}]
response = await client.chat.completions.create(
messages=messages, model=model, temperature=0.3
)
return response.choices[0].message.content
@cl.on_chat_start
async def on_chat_start():
cl.user_session.set("agent", SQLAgent)
@cl.on_message
async def on_message(message: cl.Message):
await process_message(message.content)
@cl.on_audio_chunk
async def on_audio_chunk(chunk: cl.AudioChunk):
if chunk.isStart:
buffer = BytesIO()
# This is required for whisper to recognize the file type
buffer.name = f"input_audio.{chunk.mimeType.split('/')[1]}"
# Initialize the session for a new audio stream
cl.user_session.set("audio_buffer", buffer)
cl.user_session.set("audio_mime_type", chunk.mimeType)
cl.user_session.get("audio_buffer").write(chunk.data)
@cl.on_audio_end
async def on_audio_end(elements: list[Audio]):
audio_buffer: BytesIO = cl.user_session.get("audio_buffer")
audio_buffer.seek(0)
audio_file = audio_buffer.read()
audio_mime_type: str = cl.user_session.get("audio_mime_type")
input_audio_el = Audio(
mime=audio_mime_type, content=audio_file, name=audio_buffer.name
)
await cl.Message(
author="You",
type="user_message",
content="",
elements=[input_audio_el, *elements]
).send()
answer_message = await cl.Message(content="").send()
whisper_input = (audio_buffer.name, audio_file, audio_mime_type)
transcription = await speech_to_text(whisper_input)
await process_message(transcription, answer_message, audio_mime_type)
# Reset audio buffer and mime type
cl.user_session.set("audio_buffer", None)
cl.user_session.set("audio_mime_type", None)
print("Audio buffer reset")
async def process_message(content: str, answer_message=None, mime_type=None):
agent = cl.user_session.get("agent")
cb = cl.AsyncLangchainCallbackHandler(stream_final_answer=True)
config = RunnableConfig(callbacks=[cb])
async with cl.Step(name="SmartQuery Agent", root=True) as step:
step.input = content
result = await agent.ainvoke(content, config=config)
final_answer = result.get('output', 'No answer returned')
await step.stream_token(final_answer)
if answer_message:
answer_message.content = final_answer
await answer_message.update()