Spaces:
Runtime error
Runtime error
feature: agent response to help with debugging
Browse files- agent_response.py +21 -0
- chat_service.py +14 -7
- respond_to_prompt_actor.py +14 -9
- text_to_speech_service.py +6 -2
agent_response.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
class AgentResponse(dict):
|
| 4 |
+
def __init__(self, prompt=None, **kwargs):
|
| 5 |
+
super().__init__(**kwargs)
|
| 6 |
+
if prompt is not None:
|
| 7 |
+
self['prompt'] = prompt
|
| 8 |
+
self['llm_preview'] = ''
|
| 9 |
+
self['llm_sentence'] = ''
|
| 10 |
+
self['llm_sentence_id'] = 0
|
| 11 |
+
self['llm_sentences'] = []
|
| 12 |
+
self['tts_raw_chunk'] = None
|
| 13 |
+
self['tts_raw_chunk_id'] = 0
|
| 14 |
+
|
| 15 |
+
def make_copy(self):
|
| 16 |
+
new_instance = self.__class__.__new__(self.__class__)
|
| 17 |
+
new_instance.update(self.copy())
|
| 18 |
+
return new_instance
|
| 19 |
+
|
| 20 |
+
def to_json(self):
|
| 21 |
+
return json.dumps(self)
|
chat_service.py
CHANGED
|
@@ -4,6 +4,7 @@ import json
|
|
| 4 |
import os
|
| 5 |
import torch
|
| 6 |
import openai
|
|
|
|
| 7 |
|
| 8 |
class ChatService:
|
| 9 |
def __init__(self, api="openai", model_id = "gpt-3.5-turbo"):
|
|
@@ -101,9 +102,9 @@ I fell off the pink step, and I had an accident.
|
|
| 101 |
return True
|
| 102 |
return False
|
| 103 |
|
| 104 |
-
async def get_responses_as_sentances_async(self,
|
| 105 |
-
self._messages.append({"role": "user", "content": prompt})
|
| 106 |
-
|
| 107 |
current_sentence = ""
|
| 108 |
delay = 0.1
|
| 109 |
|
|
@@ -123,17 +124,23 @@ I fell off the pink step, and I had an accident.
|
|
| 123 |
if 'content' in chunk_message:
|
| 124 |
chunk_text = chunk_message['content']
|
| 125 |
current_sentence += chunk_text
|
| 126 |
-
|
|
|
|
| 127 |
text_to_speak = self._should_we_send_to_voice(current_sentence)
|
| 128 |
if text_to_speak:
|
| 129 |
-
yield text_to_speak
|
| 130 |
current_sentence = current_sentence[len(text_to_speak):]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
if cancel_event.is_set():
|
| 133 |
return
|
| 134 |
if len(current_sentence) > 0:
|
| 135 |
-
|
| 136 |
-
|
|
|
|
| 137 |
return
|
| 138 |
|
| 139 |
except openai.error.APIError as e:
|
|
|
|
| 4 |
import os
|
| 5 |
import torch
|
| 6 |
import openai
|
| 7 |
+
from agent_response import AgentResponse
|
| 8 |
|
| 9 |
class ChatService:
|
| 10 |
def __init__(self, api="openai", model_id = "gpt-3.5-turbo"):
|
|
|
|
| 102 |
return True
|
| 103 |
return False
|
| 104 |
|
| 105 |
+
async def get_responses_as_sentances_async(self, agent_response:AgentResponse, cancel_event):
|
| 106 |
+
self._messages.append({"role": "user", "content": agent_response['prompt']})
|
| 107 |
+
llm_response = ""
|
| 108 |
current_sentence = ""
|
| 109 |
delay = 0.1
|
| 110 |
|
|
|
|
| 124 |
if 'content' in chunk_message:
|
| 125 |
chunk_text = chunk_message['content']
|
| 126 |
current_sentence += chunk_text
|
| 127 |
+
llm_response += chunk_text
|
| 128 |
+
agent_response['llm_preview'] = current_sentence
|
| 129 |
text_to_speak = self._should_we_send_to_voice(current_sentence)
|
| 130 |
if text_to_speak:
|
|
|
|
| 131 |
current_sentence = current_sentence[len(text_to_speak):]
|
| 132 |
+
agent_response['llm_preview'] = ''
|
| 133 |
+
agent_response['llm_sentence'] = text_to_speak
|
| 134 |
+
agent_response['llm_sentences'].append(text_to_speak)
|
| 135 |
+
yield agent_response
|
| 136 |
+
agent_response['llm_sentence_id'] += 1
|
| 137 |
|
| 138 |
if cancel_event.is_set():
|
| 139 |
return
|
| 140 |
if len(current_sentence) > 0:
|
| 141 |
+
agent_response['llm_sentence'] = current_sentence
|
| 142 |
+
yield agent_response
|
| 143 |
+
self._messages.append({"role": "assistant", "content": llm_response})
|
| 144 |
return
|
| 145 |
|
| 146 |
except openai.error.APIError as e:
|
respond_to_prompt_actor.py
CHANGED
|
@@ -7,6 +7,8 @@ from chat_service import ChatService
|
|
| 7 |
import asyncio
|
| 8 |
# from ray.actor import ActorHandle
|
| 9 |
from ffmpeg_converter_actor import FFMpegConverterActor
|
|
|
|
|
|
|
| 10 |
|
| 11 |
@ray.remote
|
| 12 |
class PromptToLLMActor:
|
|
@@ -21,11 +23,13 @@ class PromptToLLMActor:
|
|
| 21 |
while True:
|
| 22 |
prompt = await self.input_queue.get_async()
|
| 23 |
self.cancel_event = asyncio.Event()
|
| 24 |
-
|
| 25 |
-
|
|
|
|
| 26 |
continue
|
| 27 |
-
print(f"{
|
| 28 |
-
|
|
|
|
| 29 |
|
| 30 |
async def cancel(self):
|
| 31 |
if self.cancel_event:
|
|
@@ -46,10 +50,11 @@ class LLMSentanceToSpeechActor:
|
|
| 46 |
|
| 47 |
async def run(self):
|
| 48 |
while True:
|
| 49 |
-
|
| 50 |
self.cancel_event = asyncio.Event()
|
| 51 |
-
async for
|
| 52 |
-
|
|
|
|
| 53 |
|
| 54 |
async def cancel(self):
|
| 55 |
if self.cancel_event:
|
|
@@ -89,8 +94,8 @@ class SpeechToConverterActor:
|
|
| 89 |
await self.ffmpeg_converter_actor.start_process.remote()
|
| 90 |
self.ffmpeg_converter_actor.run.remote()
|
| 91 |
while True:
|
| 92 |
-
|
| 93 |
-
|
| 94 |
await self.ffmpeg_converter_actor.push_chunk.remote(audio_chunk)
|
| 95 |
|
| 96 |
async def cancel(self):
|
|
|
|
| 7 |
import asyncio
|
| 8 |
# from ray.actor import ActorHandle
|
| 9 |
from ffmpeg_converter_actor import FFMpegConverterActor
|
| 10 |
+
from agent_response import AgentResponse
|
| 11 |
+
|
| 12 |
|
| 13 |
@ray.remote
|
| 14 |
class PromptToLLMActor:
|
|
|
|
| 23 |
while True:
|
| 24 |
prompt = await self.input_queue.get_async()
|
| 25 |
self.cancel_event = asyncio.Event()
|
| 26 |
+
agent_response = AgentResponse(prompt)
|
| 27 |
+
async for sentence_response in self.chat_service.get_responses_as_sentances_async(agent_response, self.cancel_event):
|
| 28 |
+
if self.chat_service.ignore_sentence(sentence_response['llm_sentence']):
|
| 29 |
continue
|
| 30 |
+
print(f"{sentence_response['llm_sentence']} id: {agent_response['llm_sentence_id']} from prompt: {agent_response['prompt']}")
|
| 31 |
+
sentence_response = sentence_response.make_copy()
|
| 32 |
+
await self.output_queue.put_async(sentence_response)
|
| 33 |
|
| 34 |
async def cancel(self):
|
| 35 |
if self.cancel_event:
|
|
|
|
| 50 |
|
| 51 |
async def run(self):
|
| 52 |
while True:
|
| 53 |
+
sentence_response = await self.input_queue.get_async()
|
| 54 |
self.cancel_event = asyncio.Event()
|
| 55 |
+
async for chunk_response in self.tts_service.get_speech_chunks_async(sentence_response, self.cancel_event):
|
| 56 |
+
chunk_response = chunk_response.make_copy()
|
| 57 |
+
await self.output_queue.put_async(chunk_response)
|
| 58 |
|
| 59 |
async def cancel(self):
|
| 60 |
if self.cancel_event:
|
|
|
|
| 94 |
await self.ffmpeg_converter_actor.start_process.remote()
|
| 95 |
self.ffmpeg_converter_actor.run.remote()
|
| 96 |
while True:
|
| 97 |
+
chunk_response = await self.input_queue.get_async()
|
| 98 |
+
audio_chunk = chunk_response['tts_raw_chunk']
|
| 99 |
await self.ffmpeg_converter_actor.push_chunk.remote(audio_chunk)
|
| 100 |
|
| 101 |
async def cancel(self):
|
text_to_speech_service.py
CHANGED
|
@@ -4,6 +4,7 @@ import os
|
|
| 4 |
from elevenlabs import generate, play
|
| 5 |
from elevenlabs import set_api_key
|
| 6 |
from elevenlabs import generate, stream
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
class TextToSpeechService:
|
|
@@ -46,7 +47,8 @@ class TextToSpeechService:
|
|
| 46 |
)
|
| 47 |
return audio_stream
|
| 48 |
|
| 49 |
-
async def get_speech_chunks_async(self,
|
|
|
|
| 50 |
stream = self.stream(text_to_speak)
|
| 51 |
stream, stream_backup = itertools.tee(stream)
|
| 52 |
while True:
|
|
@@ -58,6 +60,8 @@ class TextToSpeechService:
|
|
| 58 |
|
| 59 |
# Run next(stream) in a separate thread to avoid blocking the event loop
|
| 60 |
chunk = await asyncio.to_thread(next, stream)
|
|
|
|
| 61 |
if cancel_event.is_set():
|
| 62 |
return
|
| 63 |
-
yield
|
|
|
|
|
|
| 4 |
from elevenlabs import generate, play
|
| 5 |
from elevenlabs import set_api_key
|
| 6 |
from elevenlabs import generate, stream
|
| 7 |
+
from agent_response import AgentResponse
|
| 8 |
|
| 9 |
|
| 10 |
class TextToSpeechService:
|
|
|
|
| 47 |
)
|
| 48 |
return audio_stream
|
| 49 |
|
| 50 |
+
async def get_speech_chunks_async(self, sentence_response:AgentResponse, cancel_event):
|
| 51 |
+
text_to_speak = sentence_response['llm_sentence']
|
| 52 |
stream = self.stream(text_to_speak)
|
| 53 |
stream, stream_backup = itertools.tee(stream)
|
| 54 |
while True:
|
|
|
|
| 60 |
|
| 61 |
# Run next(stream) in a separate thread to avoid blocking the event loop
|
| 62 |
chunk = await asyncio.to_thread(next, stream)
|
| 63 |
+
sentence_response['tts_raw_chunk'] = chunk
|
| 64 |
if cancel_event.is_set():
|
| 65 |
return
|
| 66 |
+
yield sentence_response
|
| 67 |
+
sentence_response['tts_raw_chunk_id'] += 1
|