Spaces:

JulsdL
/

SmartQuery

Paused

App Files Files Community

JulsdL commited on May 23, 2024

Commit

b77b218

1 Parent(s): 3b5f5c4

Enhance app.py with environmental loading, transcription, and text-to-speech capabilities using ElevenLabs API and AsyncOpenAI; refactor message processing and integrate audio handling steps.

Browse files

Files changed (2) hide show

app.py +123 -6
old_app.py +24 -0

app.py CHANGED Viewed

@@ -1,24 +1,141 @@
 import chainlit as cl
 from langchain.schema.runnable.config import RunnableConfig
 from sql_agent import SQLAgent
-# ChainLit Integration
 @cl.on_chat_start
 async def on_chat_start():
     cl.user_session.set("agent", SQLAgent)
 @cl.on_message
 async def on_message(message: cl.Message):
-    agent = cl.user_session.get("agent")  # Get the agent from the session
     cb = cl.AsyncLangchainCallbackHandler(stream_final_answer=True)
     config = RunnableConfig(callbacks=[cb])
     async with cl.Step(name="SmartQuery Agent", root=True) as step:
-        step.input = message.content
-        result = await agent.ainvoke(message.content, config=config)
-        # Assuming the result is a dictionary with a key 'output' containing the final answer
         final_answer = result.get('output', 'No answer returned')
-        # Stream the final answer as a token to the step
         await step.stream_token(final_answer)

+from io import BytesIO
+import os
 import chainlit as cl
+import httpx
+from dotenv import load_dotenv
 from langchain.schema.runnable.config import RunnableConfig
 from sql_agent import SQLAgent
+from openai import AsyncOpenAI
+from chainlit.element import Audio
+# Load the .env file
+load_dotenv()
+# Set up the transcription API (e.g., Eleven Labs)
+ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY")
+ELEVENLABS_VOICE_ID = os.environ.get("ELEVENLABS_VOICE_ID")
+if not ELEVENLABS_API_KEY or not ELEVENLABS_VOICE_ID:
+    raise ValueError("ELEVENLABS_API_KEY and ELEVENLABS_VOICE_ID must be set")
+client = AsyncOpenAI()
+@cl.step(type="tool")
+async def speech_to_text(audio_file):
+    response = await client.audio.transcriptions.create(
+        model="whisper-1", file=audio_file
+    )
+    return response.text
+@cl.step(type="tool")
+async def generate_text_answer(transcription, images):
+    model = "gpt-4-turbo"
+    messages = [{"role": "user", "content": transcription}]
+    response = await client.chat.completions.create(
+        messages=messages, model=model, temperature=0.3
+    )
+    return response.choices[0].message.content
+@cl.step(type="tool")
+async def text_to_speech(text: str, mime_type: str):
+    CHUNK_SIZE = 1024
+    url = f"https://api.elevenlabs.io/v1/text-to-speech/{ELEVENLABS_VOICE_ID}"
+    headers = {
+        "Accept": mime_type,
+        "Content-Type": "application/json",
+        "xi-api-key": ELEVENLABS_API_KEY
+    }
+    data = {
+        "text": text,
+        "model_id": "eleven_monolingual_v1",
+        "voice_settings": {
+            "stability": 0.5,
+            "similarity_boost": 0.5
+        }
+    }
+    async with httpx.AsyncClient(timeout=25.0) as client:
+        response = await client.post(url, json=data, headers=headers)
+        response.raise_for_status()  # Ensure we notice bad responses
+        buffer = BytesIO()
+        buffer.name = f"output_audio.{mime_type.split('/')[1]}"
+        async for chunk in response.aiter_bytes(chunk_size=CHUNK_SIZE):
+            if chunk:
+                buffer.write(chunk)
+        buffer.seek(0)
+        return buffer.name, buffer.read()
 @cl.on_chat_start
 async def on_chat_start():
     cl.user_session.set("agent", SQLAgent)
 @cl.on_message
 async def on_message(message: cl.Message):
+    await process_message(message.content)
+@cl.on_audio_chunk
+async def on_audio_chunk(chunk: cl.AudioChunk):
+    if chunk.isStart:
+        buffer = BytesIO()
+        # This is required for whisper to recognize the file type
+        buffer.name = f"input_audio.{chunk.mimeType.split('/')[1]}"
+        # Initialize the session for a new audio stream
+        cl.user_session.set("audio_buffer", buffer)
+        cl.user_session.set("audio_mime_type", chunk.mimeType)
+    cl.user_session.get("audio_buffer").write(chunk.data)
+@cl.on_audio_end
+async def on_audio_end(elements: list[Audio]):
+    audio_buffer: BytesIO = cl.user_session.get("audio_buffer")
+    audio_buffer.seek(0)
+    audio_file = audio_buffer.read()
+    audio_mime_type: str = cl.user_session.get("audio_mime_type")
+    input_audio_el = Audio(
+        mime=audio_mime_type, content=audio_file, name=audio_buffer.name
+    )
+    await cl.Message(
+        author="You",
+        type="user_message",
+        content="",
+        elements=[input_audio_el, *elements]
+    ).send()
+    answer_message = await cl.Message(content="").send()
+    whisper_input = (audio_buffer.name, audio_file, audio_mime_type)
+    transcription = await speech_to_text(whisper_input)
+    await process_message(transcription, answer_message, audio_mime_type)
+async def process_message(content: str, answer_message=None, mime_type=None):
+    agent = cl.user_session.get("agent")
     cb = cl.AsyncLangchainCallbackHandler(stream_final_answer=True)
     config = RunnableConfig(callbacks=[cb])
     async with cl.Step(name="SmartQuery Agent", root=True) as step:
+        step.input = content
+        result = await agent.ainvoke(content, config=config)
         final_answer = result.get('output', 'No answer returned')
         await step.stream_token(final_answer)
+        if mime_type:
+            output_name, output_audio = await text_to_speech(final_answer, mime_type)
+            output_audio_el = Audio(
+                name=output_name,
+                auto_play=True,
+                mime=mime_type,
+                content=output_audio,
+            )
+            answer_message.elements = [output_audio_el]
+            await answer_message.update()
+        else:
+            await cl.Message(content=final_answer).send()

old_app.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import chainlit as cl
+from langchain.schema.runnable.config import RunnableConfig
+from sql_agent import SQLAgent
+# ChainLit Integration
+@cl.on_chat_start
+async def on_chat_start():
+    cl.user_session.set("agent", SQLAgent)
+@cl.on_message
+async def on_message(message: cl.Message):
+    agent = cl.user_session.get("agent")  # Get the agent from the session
+    cb = cl.AsyncLangchainCallbackHandler(stream_final_answer=True)
+    config = RunnableConfig(callbacks=[cb])
+    async with cl.Step(name="SmartQuery Agent", root=True) as step:
+        step.input = message.content
+        result = await agent.ainvoke(message.content, config=config)
+        # Assuming the result is a dictionary with a key 'output' containing the final answer
+        final_answer = result.get('output', 'No answer returned')
+        # Stream the final answer as a token to the step
+        await step.stream_token(final_answer)