Spaces:
Sleeping
Sleeping
import asyncio | |
import base64 | |
import os | |
import tempfile | |
import numpy as np | |
from project.config import settings | |
import pandas as pd | |
class ChatBot: | |
chat_history = [] | |
def __init__(self, memory=None): | |
self.chat_history.append({ | |
"role": 'assistant', | |
'content': "Hi! What would you like to order from the food?" | |
}) | |
def _transform_bytes_to_file(data_bytes) -> str: | |
audio_bytes = base64.b64decode(data_bytes) | |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') | |
try: | |
temp_file.write(audio_bytes) | |
filepath = temp_file.name | |
finally: | |
temp_file.close() | |
return filepath | |
async def _transcript_audio(temp_filepath: str) -> str: | |
with open(temp_filepath, 'rb') as file: | |
transcript = await settings.OPENAI_CLIENT.audio.transcriptions.create( | |
model='whisper-1', | |
file=file, | |
# language='nl' | |
) | |
text = transcript.text | |
return text | |
async def _convert_to_embeddings(query: str): | |
response = await settings.OPENAI_CLIENT.embeddings.create( | |
input=query, | |
model='text-embedding-3-large' | |
) | |
embeddings = response.data[0].embedding | |
return embeddings | |
async def _convert_response_to_voice(ai_response: str) -> str: | |
audio = await settings.OPENAI_CLIENT.audio.speech.create( | |
model="tts-1", | |
voice="alloy", | |
input=ai_response | |
) | |
encoded_audio = base64.b64encode(audio.content).decode('utf-8') | |
return encoded_audio | |
async def _get_context_data(query: list[float]) -> str: | |
query = np.array([query]).astype('float32') | |
_, distances, indices = settings.FAISS_INDEX.range_search(query.astype('float32'), settings.SEARCH_RADIUS) | |
indices_distances_df = pd.DataFrame({'index': indices, 'distance': distances}) | |
filtered_data_df = settings.products_dataset.iloc[indices] | |
filtered_data_df['distance'] = indices_distances_df['distance'].values | |
sorted_data_df: pd.DataFrame = filtered_data_df.sort_values(by='distance').reset_index(drop=True) | |
sorted_data_df = sorted_data_df.drop('distance', axis=1) | |
data = sorted_data_df.head(1).to_dict(orient='records') | |
context_str = '' | |
for row in data: | |
context_str += f'{row["Search"]}\n\n' | |
return context_str | |
async def _rag(self, query: str, query_type: str, context: str = None): | |
if context: | |
self.chat_history.append({'role': 'assistant', 'content': context}) | |
prompt = settings.PRODUCT_PROMPT | |
else: | |
if 'search' in query_type.lower(): | |
prompt = settings.EMPTY_PRODUCT_PROMPT | |
elif 'purchase' in query_type.lower(): | |
prompt = settings.ADD_TO_CART_PROMPT | |
elif 'product_list' in query_type.lower(): | |
prompt = settings.PRODUCT_LIST_PROMPT | |
else: | |
prompt = settings.EMPTY_PRODUCT_PROMPT | |
self.chat_history.append({ | |
'role': 'user', | |
'content': query | |
}) | |
messages = [ | |
{ | |
'role': 'system', | |
'content': f"{prompt}" | |
}, | |
] | |
messages += self.chat_history | |
completion = await settings.OPENAI_CLIENT.chat.completions.create( | |
messages=messages, | |
temperature=0, | |
n=1, | |
model="gpt-3.5-turbo", | |
) | |
response = completion.choices[0].message.content | |
self.chat_history.append({'role': 'assistant', 'content': response}) | |
return response | |
async def _get_query_type(self, query: str) -> str: | |
assistant_message = self.chat_history[-1]['content'] | |
messages = [ | |
{ | |
"role": 'system', | |
'content': settings.ANALYZER_PROMPT | |
}, | |
{ | |
"role": 'user', | |
"content": f"Assistant message: {assistant_message}\n" | |
f"User response: {query}" | |
} | |
] | |
completion = await settings.OPENAI_CLIENT.chat.completions.create( | |
messages=messages, | |
temperature=0, | |
n=1, | |
model="gpt-3.5-turbo", | |
) | |
response = completion.choices[0].message.content | |
return response | |
async def ask(self, data: dict): | |
audio = data['audio'] | |
temp_filepath = self._transform_bytes_to_file(audio) | |
transcript = await self._transcript_audio(temp_filepath) | |
query_type = await self._get_query_type(transcript) | |
context = None | |
if query_type == 'search': | |
transformed_query = await self._convert_to_embeddings(transcript) | |
context = await self._get_context_data(transformed_query) | |
ai_response = await self._rag(transcript, query_type, context) | |
voice_ai_response = await self._convert_response_to_voice(ai_response) | |
data = { | |
'user_query': transcript, | |
'ai_response': ai_response, | |
'voice_response': voice_ai_response | |
} | |
try: | |
os.remove(temp_filepath) | |
except FileNotFoundError: | |
pass | |
return data | |