File size: 5,431 Bytes
98dceee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import asyncio
import base64
import os
import tempfile

import numpy as np

from project.config import settings
import pandas as pd


class ChatBot:
    chat_history = []

    def __init__(self, memory=None):
        self.chat_history.append({
            "role": 'assistant',
            'content': "Hi! What would you like to order from the food?"
        })

    @staticmethod
    def _transform_bytes_to_file(data_bytes) -> str:
        audio_bytes = base64.b64decode(data_bytes)
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
        try:
            temp_file.write(audio_bytes)
            filepath = temp_file.name
        finally:
            temp_file.close()
        return filepath

    @staticmethod
    async def _transcript_audio(temp_filepath: str) -> str:
        with open(temp_filepath, 'rb') as file:
            transcript = await settings.OPENAI_CLIENT.audio.transcriptions.create(
                model='whisper-1',
                file=file,
                # language='nl'
            )
        text = transcript.text
        return text

    @staticmethod
    async def _convert_to_embeddings(query: str):
        response = await settings.OPENAI_CLIENT.embeddings.create(
            input=query,
            model='text-embedding-3-large'
        )
        embeddings = response.data[0].embedding
        return embeddings

    @staticmethod
    async def _convert_response_to_voice(ai_response: str) -> str:
        audio = await settings.OPENAI_CLIENT.audio.speech.create(
            model="tts-1",
            voice="alloy",
            input=ai_response
        )
        encoded_audio = base64.b64encode(audio.content).decode('utf-8')
        return encoded_audio

    @staticmethod
    async def _get_context_data(query: list[float]) -> str:
        query = np.array([query]).astype('float32')
        _, distances, indices = settings.FAISS_INDEX.range_search(query.astype('float32'), settings.SEARCH_RADIUS)
        indices_distances_df = pd.DataFrame({'index': indices, 'distance': distances})
        filtered_data_df = settings.products_dataset.iloc[indices]
        filtered_data_df['distance'] = indices_distances_df['distance'].values
        sorted_data_df: pd.DataFrame = filtered_data_df.sort_values(by='distance').reset_index(drop=True)
        sorted_data_df = sorted_data_df.drop('distance', axis=1)
        data = sorted_data_df.head(1).to_dict(orient='records')
        context_str = ''
        for row in data:
            context_str += f'{row["Search"]}\n\n'
        return context_str

    async def _rag(self, query: str, query_type: str, context: str = None):
        if context:
            self.chat_history.append({'role': 'assistant', 'content': context})
            prompt = settings.PRODUCT_PROMPT
        else:
            if 'search' in query_type.lower():
                prompt = settings.EMPTY_PRODUCT_PROMPT
            elif 'purchase' in query_type.lower():
                prompt = settings.ADD_TO_CART_PROMPT
            elif 'product_list' in query_type.lower():
                prompt = settings.PRODUCT_LIST_PROMPT
            else:
                prompt = settings.EMPTY_PRODUCT_PROMPT
        self.chat_history.append({
            'role': 'user',
            'content': query
        })
        messages = [
            {
                'role': 'system',
                'content': f"{prompt}"
            },
        ]
        messages += self.chat_history
        completion = await settings.OPENAI_CLIENT.chat.completions.create(
            messages=messages,
            temperature=0,
            n=1,
            model="gpt-3.5-turbo",
        )
        response = completion.choices[0].message.content
        self.chat_history.append({'role': 'assistant', 'content': response})
        return response

    async def _get_query_type(self, query: str) -> str:
        assistant_message = self.chat_history[-1]['content']
        messages = [
            {
                "role": 'system',
                'content': settings.ANALYZER_PROMPT
            },
            {
                "role": 'user',
                "content": f"Assistant message: {assistant_message}\n"
                           f"User response: {query}"
            }
        ]
        completion = await settings.OPENAI_CLIENT.chat.completions.create(
            messages=messages,
            temperature=0,
            n=1,
            model="gpt-3.5-turbo",
        )
        response = completion.choices[0].message.content
        return response

    async def ask(self, data: dict):
        audio = data['audio']
        temp_filepath = self._transform_bytes_to_file(audio)
        transcript = await self._transcript_audio(temp_filepath)
        query_type = await self._get_query_type(transcript)

        context = None
        if query_type == 'search':
            transformed_query = await self._convert_to_embeddings(transcript)
            context = await self._get_context_data(transformed_query)
        ai_response = await self._rag(transcript, query_type, context)
        voice_ai_response = await self._convert_response_to_voice(ai_response)

        data = {
            'user_query': transcript,
            'ai_response': ai_response,
            'voice_response': voice_ai_response
        }

        try:
            os.remove(temp_filepath)
        except FileNotFoundError:
            pass
        return data