File size: 2,675 Bytes
9ff01ff
 
 
 
 
 
 
 
6aa774c
9ff01ff
 
 
 
 
7352d7a
 
9ff01ff
 
 
 
d24ee64
 
9ff01ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import pandas as pd

from src.tools.retriever import Retriever
from src.tools.llm import LlmAgent
from src.model.block import Block


class Chatbot:
    def __init__(self, llm_agent : LlmAgent, retriever : Retriever):
        self.retriever = retriever
        self.llm = llm_agent

    def get_response(self, query, histo):
        histo_conversation, histo_queries = self._get_histo(histo)
        langage_of_query = self.llm.detect_language(query).lower()
        queries = self.llm.translate(text=histo_queries)
        block_sources = self.retriever.similarity_search(query=queries)
        block_sources = self._select_best_sources(block_sources)
        sources_contents = [s.content for s in block_sources]
        context = '\n'.join(sources_contents)
        answer = self.llm.generate_paragraph(query=queries, histo=histo_conversation, context=context, language=langage_of_query)
        answer = self.llm.generate_answer(answer=answer, query=query, histo=histo_conversation, context=context,language=langage_of_query)
        answer = self._clean_answer(answer)
        return answer, block_sources

    

    @staticmethod
    def  _select_best_sources(sources: [Block], delta_1_2=0.15, delta_1_n=0.3, absolute=1.2, alpha=0.9) -> [Block]:
        """
        Select the best sources: not far from the very best, not far from the last selected, and not too bad per se
        """
        best_sources = []
        for idx, s in enumerate(sources):
            if idx == 0 \
                    or (s.distance - sources[idx - 1].distance < delta_1_2
                        and s.distance - sources[0].distance < delta_1_n) \
                    or s.distance < absolute:
                best_sources.append(s)
                delta_1_2 *= alpha
                delta_1_n *= alpha
                absolute *= alpha
            else:
                break
        return best_sources
    

    @staticmethod
    def _get_histo(histo: [(str, str)]) -> (str, str):
        histo_conversation = ""
        histo_queries = ""

        for (query, answer) in histo[-5:]:
            histo_conversation += f'user: {query} \n bot: {answer}\n'
            histo_queries += query + '\n'
        return histo_conversation[:-1], histo_queries
    

    @staticmethod
    def _clean_answer(answer: str) -> str:
        answer = answer.strip('bot:')
        while answer and answer[-1] in {"'", '"', " ", "`"}:
            answer = answer[:-1]
        while answer and answer[0] in {"'", '"', " ", "`"}:
            answer = answer[1:]
        answer = answer.strip('bot:')
        if answer:
            if answer[-1] != ".":
                answer += "."
        return answer