Spaces:
Runtime error
Runtime error
adrien.aribaut-gaudin
commited on
Commit
·
d24ee64
1
Parent(s):
6aa774c
added the possibility to answer in french
Browse files- src/control/control.py +4 -3
- src/model/container.py +15 -1
- src/tools/llm.py +16 -7
src/control/control.py
CHANGED
@@ -12,13 +12,14 @@ class Chatbot:
|
|
12 |
|
13 |
def get_response(self, query, histo):
|
14 |
histo_conversation, histo_queries = self._get_histo(histo)
|
15 |
-
|
|
|
16 |
block_sources = self.retriever.similarity_search(query=queries)
|
17 |
block_sources = self._select_best_sources(block_sources)
|
18 |
sources_contents = [s.content for s in block_sources]
|
19 |
context = '\n'.join(sources_contents)
|
20 |
-
answer = self.llm.generate_paragraph(query=queries, histo=histo_conversation, context=context, language=
|
21 |
-
answer = self.llm.generate_answer(
|
22 |
answer = self._clean_answer(answer)
|
23 |
return answer, block_sources
|
24 |
|
|
|
12 |
|
13 |
def get_response(self, query, histo):
|
14 |
histo_conversation, histo_queries = self._get_histo(histo)
|
15 |
+
langage_of_query = self.llm.detect_language(query)
|
16 |
+
queries = self.llm.translate(text=histo_queries) if langage_of_query.lower() == 'fr' else histo_queries
|
17 |
block_sources = self.retriever.similarity_search(query=queries)
|
18 |
block_sources = self._select_best_sources(block_sources)
|
19 |
sources_contents = [s.content for s in block_sources]
|
20 |
context = '\n'.join(sources_contents)
|
21 |
+
answer = self.llm.generate_paragraph(query=queries, histo=histo_conversation, context=context, language=langage_of_query)
|
22 |
+
answer = self.llm.generate_answer(answer=answer, query=query, histo=histo_conversation, context=context,language=langage_of_query)
|
23 |
answer = self._clean_answer(answer)
|
24 |
return answer, block_sources
|
25 |
|
src/model/container.py
CHANGED
@@ -77,7 +77,21 @@ class Container:
|
|
77 |
|
78 |
return attached_paragraphs, children
|
79 |
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
# def create_children(self, paragraphs: [Paragraph], level: int, index: [int]):
|
83 |
# """
|
|
|
77 |
|
78 |
return attached_paragraphs, children
|
79 |
|
80 |
+
@property
|
81 |
+
def text_chunks(self, chunk=500):
|
82 |
+
text_chunks = []
|
83 |
+
text_chunk = ""
|
84 |
+
for p in self.paragraphs:
|
85 |
+
if chunk < len(text_chunk) + len(p.text):
|
86 |
+
text_chunks.append(text_chunk)
|
87 |
+
text_chunk = ""
|
88 |
+
else:
|
89 |
+
text_chunk += " " + p.text
|
90 |
+
if text_chunk and not text_chunk.isspace():
|
91 |
+
text_chunks.append(text_chunk)
|
92 |
+
for child in self.children:
|
93 |
+
text_chunks += child.text_chunks
|
94 |
+
return text_chunks
|
95 |
|
96 |
# def create_children(self, paragraphs: [Paragraph], level: int, index: [int]):
|
97 |
# """
|
src/tools/llm.py
CHANGED
@@ -34,23 +34,32 @@ class LlmAgent:
|
|
34 |
p = self.llm(template)
|
35 |
return p
|
36 |
|
37 |
-
def generate_answer(self, query: str,
|
38 |
-
"""provides the final answer
|
39 |
|
40 |
def _cut_unfinished_sentence(s: str):
|
41 |
return '.'.join(s.split('.')[:-1])
|
42 |
|
43 |
-
template = (f"Your task consists in
|
44 |
f"delimited by triple backticks: ```{query}``` \\n"
|
45 |
-
f"You are given the answer in
|
46 |
-
f"\\n You don't add new content to the answer
|
47 |
-
f"\\n 1 You can use some vocabulary from the context
|
48 |
f"```{context}```"
|
49 |
f"\\n 2 You are consistent and avoid redundancies with the rest of the initial"
|
50 |
-
f" conversation
|
51 |
)
|
52 |
|
53 |
p = self.llm(template)
|
54 |
# p = _cut_unfinished_sentence(p)
|
55 |
return p
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
|
|
34 |
p = self.llm(template)
|
35 |
return p
|
36 |
|
37 |
+
def generate_answer(self, query: str, answer: str, histo: str, context: str, language: str) -> str:
|
38 |
+
"""provides the final answer based on the initial query and the answer"""
|
39 |
|
40 |
def _cut_unfinished_sentence(s: str):
|
41 |
return '.'.join(s.split('.')[:-1])
|
42 |
|
43 |
+
template = (f"Your task consists in answering to the query from users in {language} "
|
44 |
f"delimited by triple backticks: ```{query}``` \\n"
|
45 |
+
f"You are given the answer in {language} delimited by triple backticks: ```{answer}```"
|
46 |
+
f"\\n You don't add new content to the answer but: "
|
47 |
+
f"\\n 1 You can use some vocabulary from the context delimited by triple backticks: "
|
48 |
f"```{context}```"
|
49 |
f"\\n 2 You are consistent and avoid redundancies with the rest of the initial"
|
50 |
+
f" conversation delimited by triple backticks: ```{histo}```"
|
51 |
)
|
52 |
|
53 |
p = self.llm(template)
|
54 |
# p = _cut_unfinished_sentence(p)
|
55 |
return p
|
56 |
+
|
57 |
+
def detect_language(self, text: str) -> str:
|
58 |
+
"""detects the language"""
|
59 |
+
template = (f"Your task consists in detecting the language of the following text delimited by triple backticks: "
|
60 |
+
f"```{text}```"
|
61 |
+
f"Your answer shall be the two letters code of the language"
|
62 |
+
)
|
63 |
+
p = self.llm(template)
|
64 |
+
return p
|
65 |
|