Spaces:

sohojoe
/

project_charles

Runtime error

App Files Files Community

sohojoe commited on Aug 3, 2023

Commit

87dcd10

1 Parent(s): 5ea3bc3

feature: agent response to help with debugging

Browse files

Files changed (4) hide show

agent_response.py +21 -0
chat_service.py +14 -7
respond_to_prompt_actor.py +14 -9
text_to_speech_service.py +6 -2

agent_response.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import json
+class AgentResponse(dict):
+    def __init__(self, prompt=None, **kwargs):
+        super().__init__(**kwargs)
+        if prompt is not None:
+            self['prompt'] = prompt
+            self['llm_preview'] = ''
+            self['llm_sentence'] = ''
+            self['llm_sentence_id'] = 0
+            self['llm_sentences'] = []
+            self['tts_raw_chunk'] = None
+            self['tts_raw_chunk_id'] = 0
+    def make_copy(self):
+        new_instance = self.__class__.__new__(self.__class__)
+        new_instance.update(self.copy())
+        return new_instance
+    def to_json(self):
+        return json.dumps(self)

chat_service.py CHANGED Viewed

@@ -4,6 +4,7 @@ import json
 import os
 import torch
 import openai
 class ChatService:
     def __init__(self, api="openai", model_id = "gpt-3.5-turbo"):
@@ -101,9 +102,9 @@ I fell off the pink step, and I had an accident.
             return True
         return False
-    async def get_responses_as_sentances_async(self, prompt, cancel_event):
-        self._messages.append({"role": "user", "content": prompt})
-        agent_response = ""
         current_sentence = ""
         delay = 0.1
@@ -123,17 +124,23 @@ I fell off the pink step, and I had an accident.
                     if 'content' in chunk_message:
                         chunk_text = chunk_message['content']
                         current_sentence += chunk_text
-                        agent_response += chunk_text
                         text_to_speak = self._should_we_send_to_voice(current_sentence)
                         if text_to_speak:
-                            yield text_to_speak
                             current_sentence = current_sentence[len(text_to_speak):]
                 if cancel_event.is_set():
                     return
                 if len(current_sentence) > 0:
-                    yield current_sentence
-                self._messages.append({"role": "assistant", "content": agent_response})
                 return
             except openai.error.APIError as e:

 import os
 import torch
 import openai
+from agent_response import AgentResponse
 class ChatService:
     def __init__(self, api="openai", model_id = "gpt-3.5-turbo"):
             return True
         return False
+    async def get_responses_as_sentances_async(self, agent_response:AgentResponse, cancel_event):
+        self._messages.append({"role": "user", "content": agent_response['prompt']})
+        llm_response = ""
         current_sentence = ""
         delay = 0.1
                     if 'content' in chunk_message:
                         chunk_text = chunk_message['content']
                         current_sentence += chunk_text
+                        llm_response += chunk_text
+                        agent_response['llm_preview'] = current_sentence
                         text_to_speak = self._should_we_send_to_voice(current_sentence)
                         if text_to_speak:
                             current_sentence = current_sentence[len(text_to_speak):]
+                            agent_response['llm_preview'] = ''
+                            agent_response['llm_sentence'] = text_to_speak
+                            agent_response['llm_sentences'].append(text_to_speak)
+                            yield agent_response
+                            agent_response['llm_sentence_id'] += 1
                 if cancel_event.is_set():
                     return
                 if len(current_sentence) > 0:
+                    agent_response['llm_sentence'] = current_sentence
+                    yield agent_response
+                self._messages.append({"role": "assistant", "content": llm_response})
                 return
             except openai.error.APIError as e:

respond_to_prompt_actor.py CHANGED Viewed

@@ -7,6 +7,8 @@ from chat_service import ChatService
 import asyncio
 # from ray.actor import ActorHandle
 from ffmpeg_converter_actor import FFMpegConverterActor
 @ray.remote
 class PromptToLLMActor:
@@ -21,11 +23,13 @@ class PromptToLLMActor:
         while True:
             prompt = await self.input_queue.get_async()
             self.cancel_event = asyncio.Event()
-            async for sentence in self.chat_service.get_responses_as_sentances_async(prompt, self.cancel_event):
-                if self.chat_service.ignore_sentence(sentence):
                     continue
-                print(f"{sentence}")
-                await self.output_queue.put_async(sentence)
     async def cancel(self):
         if self.cancel_event:
@@ -46,10 +50,11 @@ class LLMSentanceToSpeechActor:
     async def run(self):
         while True:
-            sentance = await self.input_queue.get_async()
             self.cancel_event = asyncio.Event()
-            async for chunk in self.tts_service.get_speech_chunks_async(sentance, self.cancel_event):
-                await self.output_queue.put_async(chunk)
     async def cancel(self):
         if self.cancel_event:
@@ -89,8 +94,8 @@ class SpeechToConverterActor:
         await self.ffmpeg_converter_actor.start_process.remote()
         self.ffmpeg_converter_actor.run.remote()
         while True:
-            audio_chunk = await self.input_queue.get_async()
-            # print (f"Got audio chunk {len(audio_chunk)}")
             await self.ffmpeg_converter_actor.push_chunk.remote(audio_chunk)
     async def cancel(self):

 import asyncio
 # from ray.actor import ActorHandle
 from ffmpeg_converter_actor import FFMpegConverterActor
+from agent_response import AgentResponse
 @ray.remote
 class PromptToLLMActor:
         while True:
             prompt = await self.input_queue.get_async()
             self.cancel_event = asyncio.Event()
+            agent_response = AgentResponse(prompt)
+            async for sentence_response in self.chat_service.get_responses_as_sentances_async(agent_response, self.cancel_event):
+                if self.chat_service.ignore_sentence(sentence_response['llm_sentence']):
                     continue
+                print(f"{sentence_response['llm_sentence']} id: {agent_response['llm_sentence_id']} from prompt: {agent_response['prompt']}")
+                sentence_response = sentence_response.make_copy()
+                await self.output_queue.put_async(sentence_response)
     async def cancel(self):
         if self.cancel_event:
     async def run(self):
         while True:
+            sentence_response = await self.input_queue.get_async()
             self.cancel_event = asyncio.Event()
+            async for chunk_response in self.tts_service.get_speech_chunks_async(sentence_response, self.cancel_event):
+                chunk_response = chunk_response.make_copy()
+                await self.output_queue.put_async(chunk_response)
     async def cancel(self):
         if self.cancel_event:
         await self.ffmpeg_converter_actor.start_process.remote()
         self.ffmpeg_converter_actor.run.remote()
         while True:
+            chunk_response = await self.input_queue.get_async()
+            audio_chunk = chunk_response['tts_raw_chunk']
             await self.ffmpeg_converter_actor.push_chunk.remote(audio_chunk)
     async def cancel(self):

text_to_speech_service.py CHANGED Viewed

@@ -4,6 +4,7 @@ import os
 from elevenlabs import generate, play
 from elevenlabs import set_api_key
 from elevenlabs import generate, stream
 class TextToSpeechService:
@@ -46,7 +47,8 @@ class TextToSpeechService:
             )
         return audio_stream
-    async def get_speech_chunks_async(self, text_to_speak, cancel_event):
         stream = self.stream(text_to_speak)
         stream, stream_backup = itertools.tee(stream)
         while True:
@@ -58,6 +60,8 @@ class TextToSpeechService:
             # Run next(stream) in a separate thread to avoid blocking the event loop
             chunk = await asyncio.to_thread(next, stream)
             if cancel_event.is_set():
                 return
-            yield chunk

 from elevenlabs import generate, play
 from elevenlabs import set_api_key
 from elevenlabs import generate, stream
+from agent_response import AgentResponse
 class TextToSpeechService:
             )
         return audio_stream
+    async def get_speech_chunks_async(self, sentence_response:AgentResponse, cancel_event):
+        text_to_speak = sentence_response['llm_sentence']
         stream = self.stream(text_to_speak)
         stream, stream_backup = itertools.tee(stream)
         while True:
             # Run next(stream) in a separate thread to avoid blocking the event loop
             chunk = await asyncio.to_thread(next, stream)
+            sentence_response['tts_raw_chunk'] = chunk
             if cancel_event.is_set():
                 return
+            yield sentence_response
+            sentence_response['tts_raw_chunk_id'] += 1