adrien.aribaut-gaudin commited on
Commit
d24ee64
·
1 Parent(s): 6aa774c

added the possibility to answer in french

Browse files
src/control/control.py CHANGED
@@ -12,13 +12,14 @@ class Chatbot:
12
 
13
  def get_response(self, query, histo):
14
  histo_conversation, histo_queries = self._get_histo(histo)
15
- queries = histo_queries
 
16
  block_sources = self.retriever.similarity_search(query=queries)
17
  block_sources = self._select_best_sources(block_sources)
18
  sources_contents = [s.content for s in block_sources]
19
  context = '\n'.join(sources_contents)
20
- answer = self.llm.generate_paragraph(query=queries, histo=histo_conversation, context=context, language='en')
21
- answer = self.llm.generate_answer(answer_en=answer, query=query, histo=histo_conversation, context=context)
22
  answer = self._clean_answer(answer)
23
  return answer, block_sources
24
 
 
12
 
13
  def get_response(self, query, histo):
14
  histo_conversation, histo_queries = self._get_histo(histo)
15
+ langage_of_query = self.llm.detect_language(query)
16
+ queries = self.llm.translate(text=histo_queries) if langage_of_query.lower() == 'fr' else histo_queries
17
  block_sources = self.retriever.similarity_search(query=queries)
18
  block_sources = self._select_best_sources(block_sources)
19
  sources_contents = [s.content for s in block_sources]
20
  context = '\n'.join(sources_contents)
21
+ answer = self.llm.generate_paragraph(query=queries, histo=histo_conversation, context=context, language=langage_of_query)
22
+ answer = self.llm.generate_answer(answer=answer, query=query, histo=histo_conversation, context=context,language=langage_of_query)
23
  answer = self._clean_answer(answer)
24
  return answer, block_sources
25
 
src/model/container.py CHANGED
@@ -77,7 +77,21 @@ class Container:
77
 
78
  return attached_paragraphs, children
79
 
80
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  # def create_children(self, paragraphs: [Paragraph], level: int, index: [int]):
83
  # """
 
77
 
78
  return attached_paragraphs, children
79
 
80
+ @property
81
+ def text_chunks(self, chunk=500):
82
+ text_chunks = []
83
+ text_chunk = ""
84
+ for p in self.paragraphs:
85
+ if chunk < len(text_chunk) + len(p.text):
86
+ text_chunks.append(text_chunk)
87
+ text_chunk = ""
88
+ else:
89
+ text_chunk += " " + p.text
90
+ if text_chunk and not text_chunk.isspace():
91
+ text_chunks.append(text_chunk)
92
+ for child in self.children:
93
+ text_chunks += child.text_chunks
94
+ return text_chunks
95
 
96
  # def create_children(self, paragraphs: [Paragraph], level: int, index: [int]):
97
  # """
src/tools/llm.py CHANGED
@@ -34,23 +34,32 @@ class LlmAgent:
34
  p = self.llm(template)
35
  return p
36
 
37
- def generate_answer(self, query: str, answer_en: str, histo: str, context: str) -> str:
38
- """provides the final answer in French based on the initial query and the answer in english"""
39
 
40
  def _cut_unfinished_sentence(s: str):
41
  return '.'.join(s.split('.')[:-1])
42
 
43
- template = (f"Your task consists in translating the answer in French to the query "
44
  f"delimited by triple backticks: ```{query}``` \\n"
45
- f"You are given the answer in english delimited by triple backticks: ```{answer_en}```"
46
- f"\\n You don't add new content to the answer in English but: "
47
- f"\\n 1 You can use some vocabulary from the context in English delimited by triple backticks: "
48
  f"```{context}```"
49
  f"\\n 2 You are consistent and avoid redundancies with the rest of the initial"
50
- f" conversation in English delimited by triple backticks: ```{histo}```"
51
  )
52
 
53
  p = self.llm(template)
54
  # p = _cut_unfinished_sentence(p)
55
  return p
 
 
 
 
 
 
 
 
 
56
 
 
34
  p = self.llm(template)
35
  return p
36
 
37
+ def generate_answer(self, query: str, answer: str, histo: str, context: str, language: str) -> str:
38
+ """provides the final answer based on the initial query and the answer"""
39
 
40
  def _cut_unfinished_sentence(s: str):
41
  return '.'.join(s.split('.')[:-1])
42
 
43
+ template = (f"Your task consists in answering to the query from users in {language} "
44
  f"delimited by triple backticks: ```{query}``` \\n"
45
+ f"You are given the answer in {language} delimited by triple backticks: ```{answer}```"
46
+ f"\\n You don't add new content to the answer but: "
47
+ f"\\n 1 You can use some vocabulary from the context delimited by triple backticks: "
48
  f"```{context}```"
49
  f"\\n 2 You are consistent and avoid redundancies with the rest of the initial"
50
+ f" conversation delimited by triple backticks: ```{histo}```"
51
  )
52
 
53
  p = self.llm(template)
54
  # p = _cut_unfinished_sentence(p)
55
  return p
56
+
57
+ def detect_language(self, text: str) -> str:
58
+ """detects the language"""
59
+ template = (f"Your task consists in detecting the language of the following text delimited by triple backticks: "
60
+ f"```{text}```"
61
+ f"Your answer shall be the two letters code of the language"
62
+ )
63
+ p = self.llm(template)
64
+ return p
65