Spaces:

Hexamind
/

QnA

Runtime error

App Files Files Community

YvesP commited on Jun 12, 2023

Commit

988c713

1 Parent(s): 00f7c25

updated version with extracts from documents

Browse files

Files changed (6) hide show

app.py +5 -19
src/model/block.py +5 -2
src/model/container.py +25 -32
src/model/doc.py +18 -14
src/tools/retriever.py +1 -0
src/view/view.py +22 -5

app.py CHANGED Viewed

@@ -4,31 +4,17 @@ from langchain.llms import OpenAI
 import chromadb
 from config import *
-# from config_key import *
 from src.control.control import Controller
 from src.tools.retriever import Retriever
 from src.tools.llm import LlmAgent
 from src.model.doc import Doc
 import src.view.view as view
 os.environ["TOKENIZERS_PARALLELISM"] = "true"
-cle = os.environ['CLE']
-open_key = os.environ['OPEN_KEY']
-os.environ["OPENAI_API_KEY"] = open_key
-OPENAI_API_KEY = "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"
-print('***')
-print(cle)
-print(open_key == OPENAI_API_KEY)
-print(f'open_key: {open_key}')
-print(f'OPENAI_API_KEY: {OPENAI_API_KEY}')
-print(f'os.environ["OPENAI_API_KEY"]: {os.environ["OPENAI_API_KEY"]}')
-print('***')
 doc_content = Doc(content_en_path)
 doc_plan = Doc(plan_path)
@@ -37,12 +23,12 @@ doc_content_fr = Doc(content_fr_path)
 client_db = chromadb.Client()
 retriever = Retriever(client_db, doc_plan, doc_content, doc_content_fr, collection_name)
-llm_model = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
 llm = LlmAgent(llm_model)
 specials['remote_rate_df'] = pd.read_csv(specials['remote_rate_path'])
 specials['accommodation_meal_df'] = pd.read_csv(specials['accommodation_meal_path'])
-controller = Controller(retriever=retriever, llm=llm,  content_language=content_language, plan_language=plan_language,
                         specials=specials)
 qna = view.run(ctrl=controller, config=view_config)

 import chromadb
 from config import *
 from src.control.control import Controller
 from src.tools.retriever import Retriever
 from src.tools.llm import LlmAgent
 from src.model.doc import Doc
 import src.view.view as view
 os.environ["TOKENIZERS_PARALLELISM"] = "true"
+if not "OPENAI_API_KEY" in os.environ:
+    from config_key import OPENAI_API_KEY
+    os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
 doc_content = Doc(content_en_path)
 doc_plan = Doc(plan_path)
 client_db = chromadb.Client()
 retriever = Retriever(client_db, doc_plan, doc_content, doc_content_fr, collection_name)
+llm_model = OpenAI(temperature=0)
 llm = LlmAgent(llm_model)
 specials['remote_rate_df'] = pd.read_csv(specials['remote_rate_path'])
 specials['accommodation_meal_df'] = pd.read_csv(specials['accommodation_meal_path'])
+controller = Controller(retriever=retriever, llm=llm, content_language=content_language, plan_language=plan_language,
                         specials=specials)
 qna = view.run(ctrl=controller, config=view_config)

src/model/block.py CHANGED Viewed

@@ -3,6 +3,7 @@ class Block:
                  index: str = '', rank: int = 0, level: int = 0, distance: float = 99999):
         self.doc = doc
         self.title = title
         self.content = content
         self.content_fr = content_fr
         self.specials = []
@@ -12,8 +13,9 @@ class Block:
         self.distance = distance
     def to_dict(self) -> {}:
-        block_dict = {'doc': self.doc, 'title': self.title, 'content': self.content, 'content_fr': self.content_fr,
-                      'index': self.index, 'rank': self.rank, 'level': self.level, 'distance': self.distance}
         for i, s in enumerate(self.specials):
             special_key = 'special_'+str(i)
             block_dict[special_key] = s
@@ -23,6 +25,7 @@ class Block:
     def from_dict(self, block_dict: {}):
         self.doc = block_dict['doc']
         self.title = block_dict['title']
         self.content = block_dict['content']
         self.content_fr = block_dict['content_fr']
         self.index = block_dict['index']

                  index: str = '', rank: int = 0, level: int = 0, distance: float = 99999):
         self.doc = doc
         self.title = title
+        self.title_fr = ""
         self.content = content
         self.content_fr = content_fr
         self.specials = []
         self.distance = distance
     def to_dict(self) -> {}:
+        block_dict = {'doc': self.doc, 'title': self.title, 'title_fr': self.title_fr, 'content': self.content,
+                      'content_fr': self.content_fr, 'index': self.index, 'rank': self.rank, 'level': self.level,
+                      'distance': self.distance}
         for i, s in enumerate(self.specials):
             special_key = 'special_'+str(i)
             block_dict[special_key] = s
     def from_dict(self, block_dict: {}):
         self.doc = block_dict['doc']
         self.title = block_dict['title']
+        self.title_fr = block_dict['title_fr']
         self.content = block_dict['content']
         self.content_fr = block_dict['content_fr']
         self.index = block_dict['index']

src/model/container.py CHANGED Viewed

@@ -6,17 +6,19 @@ INFINITE = 99999
 class Container:
-    def __init__(self, paragraphs: [Paragraph], title: Paragraph = None, level: int = 0, rank: int = 0, father=None,
-                 id_=0):
         self.level = level
         self.title = title
         self.paragraphs = []
         self.children = []
-        self.rank = rank
         self.father = father  # if not father, then the container is at the top of the hierarchy
         self.id_ = int(str(1) + str(father.id_) + str(id_))
         if paragraphs:
-            self.paragraphs, self.children = self.create_children(paragraphs, level, rank + 1)
         self.blocks = self.get_blocks()
     @property
@@ -47,7 +49,7 @@ class Container:
         return text_chunks
     def get_blocks(self):
-        block = Block(rank=self.rank, level=self.level)
         if self.title:
             block.title = self.title.text
         for p in self.paragraphs:
@@ -62,28 +64,7 @@ class Container:
             blocks += child.blocks
         return blocks
-    @property
-    def table_of_contents(self):
-        toc = []
-        if self.title:
-            toc += [{str(self.level): self.title.text}]
-        if self.children:
-            for child in self.children:
-                toc += child.table_of_contents
-        return toc
-    def move(self, position: int, new_father=None):
-        current_father = self.father  # should be added in the model
-        current_father.children.remove(self)
-        self.rank = new_father.rank + 1 if new_father else 0
-        self.father = new_father
-        if position < len(new_father.children):
-            new_father.children.insert(position, self)
-        else:
-            new_father.children.append(self)
-    def create_children(self, paragraphs, level, rank) -> ([], []):
         """
         creates children containers or directly attached content
         and returns the list of containers and contents of level+1
@@ -95,8 +76,8 @@ class Container:
         container_title = None
         children = []
         in_children = False
-        level = INFINITE
         child_id = 0
         while paragraphs:
             p = paragraphs.pop(0)
@@ -104,19 +85,31 @@ class Container:
                 attached_paragraphs.append(p)
             else:
                 in_children = True
-                if p.is_structure and p.level <= level:  # if p is higher or equal in hierarchy
                     if container_paragraphs or container_title:
-                        children.append(Container(container_paragraphs, container_title, level, rank, self, child_id))
                         child_id += 1
                     container_paragraphs = []
                     container_title = p
                     level = p.level
-                else:  # p is strictly lower in hierarchy
                     container_paragraphs.append(p)
         if container_paragraphs or container_title:
-            children.append(Container(container_paragraphs, container_title, level, rank, self, child_id))
             child_id += 1
         return attached_paragraphs, children

 class Container:
+    def __init__(self, paragraphs: [Paragraph], title: Paragraph = None, level: int = 0, index: [int] = None,
+                 father=None, id_=0):
+        if index is None:
+            index = []
         self.level = level
         self.title = title
         self.paragraphs = []
         self.children = []
+        self.index = index
         self.father = father  # if not father, then the container is at the top of the hierarchy
         self.id_ = int(str(1) + str(father.id_) + str(id_))
         if paragraphs:
+            self.paragraphs, self.children = self.create_children(paragraphs, level, index)
         self.blocks = self.get_blocks()
     @property
         return text_chunks
     def get_blocks(self):
+        block = Block(level=self.level, index=self.index)
         if self.title:
             block.title = self.title.text
         for p in self.paragraphs:
             blocks += child.blocks
         return blocks
+    def create_children(self, paragraphs: Paragraph, level: int, index: [int]) -> ([Paragraph], []):
         """
         creates children containers or directly attached content
         and returns the list of containers and contents of level+1
         container_title = None
         children = []
         in_children = False
         child_id = 0
+        level = INFINITE
         while paragraphs:
             p = paragraphs.pop(0)
                 attached_paragraphs.append(p)
             else:
                 in_children = True
+                if p.is_structure and p.level <= level:  # if p is higher in hierarchy, then the child is completed
                     if container_paragraphs or container_title:
+                        if level <= len(index):
+                            index = index[:level]
+                            index[-1] += 1
+                        else:
+                            for i in range(level-len(index)):
+                                index.append(1)
+                        children.append(Container(container_paragraphs, container_title, level, index, self, child_id))
                         child_id += 1
                     container_paragraphs = []
                     container_title = p
                     level = p.level
+                else:  # p is normal text or strictly lower in hierarchy, then the child continues to grow
                     container_paragraphs.append(p)
         if container_paragraphs or container_title:
+            if level <= len(index):
+                index = index[:level]
+                index[-1] += 1
+            else:
+                for i in range(level - len(index)):
+                    index.append(1)
+            children.append(Container(container_paragraphs, container_title, level, index, self, child_id))
             child_id += 1
         return attached_paragraphs, children

src/model/doc.py CHANGED Viewed

@@ -13,7 +13,7 @@ class Doc:
         self.id_ = id(self)
         self.path = path
         paragraphs = [Paragraph(xp, self.id_, i) for (i, xp) in enumerate(self.xdoc.paragraphs)]
-        self.container = Container(paragraphs, father=self)
         self.blocks = self.get_blocks()
     @property
@@ -29,21 +29,25 @@ class Doc:
                 index_str += '.' + str(el)
             return index_str
-        current_index = []
         blocks = self.container.blocks
         for block in blocks:
             block.doc = self.title
-            current_level = len(current_index)
-            if 0 < block.level:
-                if block.level == current_level:
-                    current_index[-1] += 1
-                elif current_level < block.level:
-                    current_index.append(1)
-                elif block.level < current_level:
-                    current_index = current_index[:block.level]
-                    current_index[-1] += 1
-                block.index = from_list_to_str(current_index)
-            else:
-                block.index = "0"
                 blocks.remove(block)
         return blocks

         self.id_ = id(self)
         self.path = path
         paragraphs = [Paragraph(xp, self.id_, i) for (i, xp) in enumerate(self.xdoc.paragraphs)]
+        self.container = Container(paragraphs, father=self, level=0)
         self.blocks = self.get_blocks()
     @property
                 index_str += '.' + str(el)
             return index_str
         blocks = self.container.blocks
         for block in blocks:
             block.doc = self.title
+            if block.level == 0:
                 blocks.remove(block)
+            block.index = from_list_to_str(block.index)
         return blocks
+"""
+    current_level = len(current_index)
+    if 0 < block.level:
+        if block.level == current_level:
+            current_index[-1] += 1
+        elif current_level < block.level:
+            current_index.append(1)
+        elif block.level < current_level:
+            current_index = current_index[:block.level]
+            current_index[-1] += 1
+        block.index = from_list_to_str(current_index)
+    else:
+        block.index = "0"
+"""

src/tools/retriever.py CHANGED Viewed

@@ -12,6 +12,7 @@ class Retriever:
             cb.specials = pb.specials
         for cb, cb_fr in zip(content_blocks, content_fr_blocks):
             cb.content_fr = cb_fr.content
         self.collection = db_client.create_collection(name=collection_name)
         self.collection.add(
             documents=[block.content for block in plan_blocks],

             cb.specials = pb.specials
         for cb, cb_fr in zip(content_blocks, content_fr_blocks):
             cb.content_fr = cb_fr.content
+            cb.title_fr = cb_fr.title
         self.collection = db_client.create_collection(name=collection_name)
         self.collection.add(
             documents=[block.content for block in plan_blocks],

src/view/view.py CHANGED Viewed

@@ -30,11 +30,19 @@ def run(ctrl: Controller, config: {}):
                     interactive=False,
                     visible=False,
                 )
-                sources_comp = gr.CheckboxGroup(
                     label="Documents sources",
                     visible=False,
                     interactive=False,
                 )
             with gr.Column():
                 pass
@@ -47,11 +55,17 @@ def run(ctrl: Controller, config: {}):
         def input_text_fn2(input_text_):
             answer, sources = ctrl.get_response(query_fr=input_text_)
-            source_labels = [s.distance_str + ' ' + s.index + ' ' + s.title + ' from ' + s.doc for s in sources]
             update_ = {
                 output_text_comp: gr.update(value=answer),
-                sources_comp: gr.update(visible=True, choices=source_labels, value=source_labels)
             }
             return update_
         def input_example_fn(input_example_):
@@ -63,9 +77,12 @@ def run(ctrl: Controller, config: {}):
         input_text_comp \
             .submit(input_text_fn1, inputs=[], outputs=[output_text_comp]) \
-            .then(input_text_fn2, inputs=[input_text_comp], outputs=[output_text_comp, sources_comp])
         input_example_comp \
             .change(input_example_fn, inputs=[input_example_comp], outputs=[input_text_comp, output_text_comp]) \
-            .then(input_text_fn2, inputs=[input_text_comp], outputs=[output_text_comp, sources_comp])
     return qna

                     interactive=False,
                     visible=False,
                 )
+                sources_title_comp = gr.CheckboxGroup(
                     label="Documents sources",
                     visible=False,
                     interactive=False,
                 )
+                source_text_comp = []
+                for i in range(4):
+                    source_text_comp.append(gr.Textbox(
+                        lines=4,
+                        max_lines=4,
+                        interactive=False,
+                        visible=False,
+                    ))
             with gr.Column():
                 pass
         def input_text_fn2(input_text_):
             answer, sources = ctrl.get_response(query_fr=input_text_)
+            source_labels = [f'{s.distance_str} {s.index} {s.title} from {s.doc}' for s in sources]
             update_ = {
                 output_text_comp: gr.update(value=answer),
+                sources_title_comp: gr.update(visible=False, choices=source_labels, value=source_labels),
             }
+            for i in range(min(len(sources), 4)):
+                s = sources[i]
+                source_label = f'{s.index}   {s.title_fr}                        score = {s.distance_str}'
+                source_text = s.content_fr
+                update_[source_text_comp[i]] = gr.update(visible=True, value=source_text, label=source_label)
             return update_
         def input_example_fn(input_example_):
         input_text_comp \
             .submit(input_text_fn1, inputs=[], outputs=[output_text_comp]) \
+            .then(input_text_fn2, inputs=[input_text_comp], outputs=[output_text_comp, sources_title_comp])
         input_example_comp \
             .change(input_example_fn, inputs=[input_example_comp], outputs=[input_text_comp, output_text_comp]) \
+            .then(input_text_fn2,
+                  inputs=[input_text_comp],
+                  outputs=[output_text_comp, sources_title_comp,
+                           source_text_comp[0], source_text_comp[1], source_text_comp[2], source_text_comp[3]])
     return qna