JulsdL commited on
Commit
b77b218
·
1 Parent(s): 3b5f5c4

Enhance app.py with environmental loading, transcription, and text-to-speech capabilities using ElevenLabs API and AsyncOpenAI; refactor message processing and integrate audio handling steps.

Browse files
Files changed (2) hide show
  1. app.py +123 -6
  2. old_app.py +24 -0
app.py CHANGED
@@ -1,24 +1,141 @@
 
 
1
  import chainlit as cl
 
 
2
  from langchain.schema.runnable.config import RunnableConfig
3
  from sql_agent import SQLAgent
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- # ChainLit Integration
6
  @cl.on_chat_start
7
  async def on_chat_start():
8
  cl.user_session.set("agent", SQLAgent)
9
 
10
  @cl.on_message
11
  async def on_message(message: cl.Message):
12
- agent = cl.user_session.get("agent") # Get the agent from the session
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  cb = cl.AsyncLangchainCallbackHandler(stream_final_answer=True)
14
  config = RunnableConfig(callbacks=[cb])
15
 
16
  async with cl.Step(name="SmartQuery Agent", root=True) as step:
17
- step.input = message.content
18
- result = await agent.ainvoke(message.content, config=config)
19
 
20
- # Assuming the result is a dictionary with a key 'output' containing the final answer
21
  final_answer = result.get('output', 'No answer returned')
22
 
23
- # Stream the final answer as a token to the step
24
  await step.stream_token(final_answer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from io import BytesIO
2
+ import os
3
  import chainlit as cl
4
+ import httpx
5
+ from dotenv import load_dotenv
6
  from langchain.schema.runnable.config import RunnableConfig
7
  from sql_agent import SQLAgent
8
+ from openai import AsyncOpenAI
9
+ from chainlit.element import Audio
10
+
11
+ # Load the .env file
12
+ load_dotenv()
13
+
14
+ # Set up the transcription API (e.g., Eleven Labs)
15
+ ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY")
16
+ ELEVENLABS_VOICE_ID = os.environ.get("ELEVENLABS_VOICE_ID")
17
+
18
+ if not ELEVENLABS_API_KEY or not ELEVENLABS_VOICE_ID:
19
+ raise ValueError("ELEVENLABS_API_KEY and ELEVENLABS_VOICE_ID must be set")
20
+
21
+ client = AsyncOpenAI()
22
+
23
+ @cl.step(type="tool")
24
+ async def speech_to_text(audio_file):
25
+ response = await client.audio.transcriptions.create(
26
+ model="whisper-1", file=audio_file
27
+ )
28
+ return response.text
29
+
30
+ @cl.step(type="tool")
31
+ async def generate_text_answer(transcription, images):
32
+ model = "gpt-4-turbo"
33
+ messages = [{"role": "user", "content": transcription}]
34
+ response = await client.chat.completions.create(
35
+ messages=messages, model=model, temperature=0.3
36
+ )
37
+ return response.choices[0].message.content
38
+
39
+ @cl.step(type="tool")
40
+ async def text_to_speech(text: str, mime_type: str):
41
+ CHUNK_SIZE = 1024
42
+ url = f"https://api.elevenlabs.io/v1/text-to-speech/{ELEVENLABS_VOICE_ID}"
43
+
44
+ headers = {
45
+ "Accept": mime_type,
46
+ "Content-Type": "application/json",
47
+ "xi-api-key": ELEVENLABS_API_KEY
48
+ }
49
+
50
+ data = {
51
+ "text": text,
52
+ "model_id": "eleven_monolingual_v1",
53
+ "voice_settings": {
54
+ "stability": 0.5,
55
+ "similarity_boost": 0.5
56
+ }
57
+ }
58
+
59
+ async with httpx.AsyncClient(timeout=25.0) as client:
60
+ response = await client.post(url, json=data, headers=headers)
61
+ response.raise_for_status() # Ensure we notice bad responses
62
+
63
+ buffer = BytesIO()
64
+ buffer.name = f"output_audio.{mime_type.split('/')[1]}"
65
+
66
+ async for chunk in response.aiter_bytes(chunk_size=CHUNK_SIZE):
67
+ if chunk:
68
+ buffer.write(chunk)
69
+
70
+ buffer.seek(0)
71
+ return buffer.name, buffer.read()
72
 
 
73
  @cl.on_chat_start
74
  async def on_chat_start():
75
  cl.user_session.set("agent", SQLAgent)
76
 
77
  @cl.on_message
78
  async def on_message(message: cl.Message):
79
+ await process_message(message.content)
80
+
81
+ @cl.on_audio_chunk
82
+ async def on_audio_chunk(chunk: cl.AudioChunk):
83
+ if chunk.isStart:
84
+ buffer = BytesIO()
85
+ # This is required for whisper to recognize the file type
86
+ buffer.name = f"input_audio.{chunk.mimeType.split('/')[1]}"
87
+ # Initialize the session for a new audio stream
88
+ cl.user_session.set("audio_buffer", buffer)
89
+ cl.user_session.set("audio_mime_type", chunk.mimeType)
90
+
91
+ cl.user_session.get("audio_buffer").write(chunk.data)
92
+
93
+ @cl.on_audio_end
94
+ async def on_audio_end(elements: list[Audio]):
95
+ audio_buffer: BytesIO = cl.user_session.get("audio_buffer")
96
+ audio_buffer.seek(0)
97
+ audio_file = audio_buffer.read()
98
+ audio_mime_type: str = cl.user_session.get("audio_mime_type")
99
+
100
+ input_audio_el = Audio(
101
+ mime=audio_mime_type, content=audio_file, name=audio_buffer.name
102
+ )
103
+ await cl.Message(
104
+ author="You",
105
+ type="user_message",
106
+ content="",
107
+ elements=[input_audio_el, *elements]
108
+ ).send()
109
+
110
+ answer_message = await cl.Message(content="").send()
111
+
112
+ whisper_input = (audio_buffer.name, audio_file, audio_mime_type)
113
+ transcription = await speech_to_text(whisper_input)
114
+
115
+ await process_message(transcription, answer_message, audio_mime_type)
116
+
117
+ async def process_message(content: str, answer_message=None, mime_type=None):
118
+ agent = cl.user_session.get("agent")
119
  cb = cl.AsyncLangchainCallbackHandler(stream_final_answer=True)
120
  config = RunnableConfig(callbacks=[cb])
121
 
122
  async with cl.Step(name="SmartQuery Agent", root=True) as step:
123
+ step.input = content
124
+ result = await agent.ainvoke(content, config=config)
125
 
 
126
  final_answer = result.get('output', 'No answer returned')
127
 
 
128
  await step.stream_token(final_answer)
129
+
130
+ if mime_type:
131
+ output_name, output_audio = await text_to_speech(final_answer, mime_type)
132
+ output_audio_el = Audio(
133
+ name=output_name,
134
+ auto_play=True,
135
+ mime=mime_type,
136
+ content=output_audio,
137
+ )
138
+ answer_message.elements = [output_audio_el]
139
+ await answer_message.update()
140
+ else:
141
+ await cl.Message(content=final_answer).send()
old_app.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chainlit as cl
2
+ from langchain.schema.runnable.config import RunnableConfig
3
+ from sql_agent import SQLAgent
4
+
5
+ # ChainLit Integration
6
+ @cl.on_chat_start
7
+ async def on_chat_start():
8
+ cl.user_session.set("agent", SQLAgent)
9
+
10
+ @cl.on_message
11
+ async def on_message(message: cl.Message):
12
+ agent = cl.user_session.get("agent") # Get the agent from the session
13
+ cb = cl.AsyncLangchainCallbackHandler(stream_final_answer=True)
14
+ config = RunnableConfig(callbacks=[cb])
15
+
16
+ async with cl.Step(name="SmartQuery Agent", root=True) as step:
17
+ step.input = message.content
18
+ result = await agent.ainvoke(message.content, config=config)
19
+
20
+ # Assuming the result is a dictionary with a key 'output' containing the final answer
21
+ final_answer = result.get('output', 'No answer returned')
22
+
23
+ # Stream the final answer as a token to the step
24
+ await step.stream_token(final_answer)