diff --git a/data/.DS_Store b/data/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6
Binary files /dev/null and b/data/.DS_Store differ
diff --git a/data/AccomodationAndMealsForfaits_en.csv b/data/AccomodationAndMealsForfaits_en.csv
new file mode 100644
index 0000000000000000000000000000000000000000..e9499ec401c3eb01e2c72798a42621745a805c5e
Binary files /dev/null and b/data/AccomodationAndMealsForfaits_en.csv differ
diff --git a/data/AccomodationAndMealsForfaits_en.numbers b/data/AccomodationAndMealsForfaits_en.numbers
new file mode 100755
index 0000000000000000000000000000000000000000..dc1284a45b900a6b1671ed70d588c363b2b521fb
Binary files /dev/null and b/data/AccomodationAndMealsForfaits_en.numbers differ
diff --git a/data/AccomodationAndMealsForfaits_fr.csv b/data/AccomodationAndMealsForfaits_fr.csv
new file mode 100644
index 0000000000000000000000000000000000000000..8b71a64f48ab37711f5a9f8ec7d2867f5c0d9d35
--- /dev/null
+++ b/data/AccomodationAndMealsForfaits_fr.csv
@@ -0,0 +1,31 @@
+Destination;Hebergement;Repas
+France;125;27
+Allemagne;150;35
+Arabie Saoudite;200;40
+Autriche;110;40
+Belgique;150;35
+Canada;150;30
+Chine;113;37
+Egypte;150;25
+Emirats Arabes Unis;160;46
+Espagne;130;30
+Etats-Unis;140;47
+Gr�ce;140;25
+Inde;160;47
+Irlande;180;30
+Italie;120;37
+Japon;150;25
+Maroc;110;25
+Mexique;130;27
+Norv�ge;160;40
+Pays-Bas;150;32
+Pologne;110;23
+Portugal;108;25
+Qatar;210;35
+Royaume-Uni;130;28
+Russie;180;50
+Singapour;170;42
+Su�de;90;30
+Suisse;192;35
+Taiwan;123;37
+Turquie;150;28
\ No newline at end of file
diff --git a/data/AccomodationAndMealsForfaits_fr.numbers b/data/AccomodationAndMealsForfaits_fr.numbers
new file mode 100755
index 0000000000000000000000000000000000000000..5269dd5685195ad2731beccaf11b68ec1612fd36
Binary files /dev/null and b/data/AccomodationAndMealsForfaits_fr.numbers differ
diff --git a/data/BaremeTauxEloignement.csv b/data/BaremeTauxEloignement.csv
new file mode 100644
index 0000000000000000000000000000000000000000..e03e92d3f006acf8ebf53734650d76dbddf81b65
--- /dev/null
+++ b/data/BaremeTauxEloignement.csv
@@ -0,0 +1,84 @@
+Tableau 1
+Barème Taux d’Éloignement;
+Afrique du sud;10 %
+Algérie;15 %
+Allemagne;0 %
+Arabie saoudite;12 %
+Argentine;11 %
+Australie;3 %
+Autriche;0 %
+Belgique;0 %
+Bolivie;11 %
+Brésil;11 %
+Bulgarie;10 %
+Cameroun;13 %
+Canada;3 %
+Chili;9 %
+Chine;13 %
+Chypre;4 %
+Colombie;13 %
+Corée;11 %
+Croatie;7 %
+Danemark;0 %
+Djibouti;13 %
+E.A.U;9 %
+Egypte;16 %
+Equateur;12 %
+Espagne;0 %
+Estonie;7 %
+Etats unis;3 %
+Ethiopie;12 %
+Finlande;0 %
+Grande Bretagne;0 %
+Grèce;0 %
+Guadeloupe;3 %
+Guyane;7 %
+Hong Kong;8 %
+Hongrie;6 %
+Ile Maurice;8 %
+Inde;15 %
+Indonésie;17 %
+Irlande;0 %
+Israël;9 %
+Italie;0 %
+Japon;8 %
+Jordanie;10 %
+Kenya;13 %
+Koweït;11 %
+Laos;13 %
+Luxembourg;0 %
+Madagascar;13 %
+Malaisie;14 %
+Maroc;8 %
+Martinique;3 %
+Mauritanie;10 %
+Mexique;12 %
+Mozambique;14 %
+Nigeria;17 %
+Norvège;0 %
+Nouvelle Calédonie;4 %
+Pakistan;17 %
+Pérou;13 %
+Philippines;16 %
+Pologne;8 %
+Polynésie;5 %
+Portugal;0 %
+Qatar;9 %
+République Congo;14 %
+République tchèque;6 %
+Roumanie;11 %
+Russie;13 %
+Sénégal;10 %
+Serbie;11 %
+Singapour;6 %
+Slovaquie;6 %
+Sri Lanka;15 %
+Suède;0 %
+Suisse;0 %
+Taiwan;11 %
+Thaïlande;12 %
+Tunisie;7 %
+Turquie;10 %
+Ukraine;12 %
+Venezuela;13 %
+Vietnam;13 %
\ No newline at end of file
diff --git a/data/DeplacementsEtVoyages.docx b/data/DeplacementsEtVoyages.docx
new file mode 100644
index 0000000000000000000000000000000000000000..181cb8e00142b59479fab2a81275a966489370c2
Binary files /dev/null and b/data/DeplacementsEtVoyages.docx differ
diff --git a/data/DeplacementsEtVoyagesRev.docx b/data/DeplacementsEtVoyagesRev.docx
new file mode 100644
index 0000000000000000000000000000000000000000..84944fcd81933a7f377b04bf6f7f3ac863e21a3a
Binary files /dev/null and b/data/DeplacementsEtVoyagesRev.docx differ
diff --git a/data/ForfaitsRemboursements.csv b/data/ForfaitsRemboursements.csv
new file mode 100644
index 0000000000000000000000000000000000000000..921485d633c352ab252f70c9ea8954c384e1fca3
--- /dev/null
+++ b/data/ForfaitsRemboursements.csv
@@ -0,0 +1,31 @@
+﻿Destination;;Hébergement;;Repas
+France;;IDF 125€ / Province 100€;;27 € 
+Allemagne;;150 € ;;35 € 
+Arabie Saoudite;;200 € ;;40 € 
+Autriche;;110 € ;;40 € 
+Belgique;;150 € ;;35 € 
+Canada;;150 € ;;30 € 
+Chine;;113 € ;;37 € 
+Egypte;;150 € ;;25 € 
+Emirats Arabes Unis;;160 € ;;46 € 
+Espagne;;130 € ;;30 € 
+Etats-Unis;;140 € ;;47 € 
+Grèce;;140 € ;;25 € 
+Inde;;160 € ;;47 € 
+Irlande;;180 € ;;30 € 
+Italie;;120 € ;;37 € 
+Japon;;150 € ;;25 € 
+Maroc;;110 € ;;25 € 
+Mexique;;130 € ;;27 € 
+Norvège;;160 € ;;40 € 
+Pays-Bas;;150 € ;;32 € 
+Pologne;;110 € ;;23 € 
+Portugal;;108 € ;;25 € 
+Qatar;;210 € ;;35 € 
+Royaume-Uni;;130 € ;;28 € 
+Russie;;180 € ;;50 € 
+Singapour;;170 € ;;42 € 
+Suède;;90 € ;;30 € 
+Suisse;;192 € ;;35 € 
+Taiwan;;123 € ;;37 € 
+Turquie;;150 € ;;28 € 
\ No newline at end of file
diff --git a/data/NonPrisEnCharge.csv b/data/NonPrisEnCharge.csv
new file mode 100644
index 0000000000000000000000000000000000000000..a1f8f4f34a2c7ac4dc7a71e3c069c02d05208fd6
--- /dev/null
+++ b/data/NonPrisEnCharge.csv
@@ -0,0 +1,14 @@
+Non pris en charge via Note de Frais
+"Matériel informatique : téléphone, chargeur, tablette, adaptateur prise, etc."
+"Outillage : balai, tournevis, disque de disqueuse, etc."
+"Mobilier/Aménagement de bureau : plantes, dalles, poufs, etc."
+"Fournitures de bureau : café, piles, etc."
+Conférence/Cotisation
+Séminaire/Réunion Team Building
+Doublon de clés
+Equipement de Protection Individuelle (EPI)
+Achat de bagagerie : neuf/perdu/endommagé
+Lavage et recharge Carte de Lavage : tous types de véhicule
+Consommation alcoolisée
+"Collation : confiserie, gâteau, boisson, etc."
+"Prestation de loisirs : Spa, piscine, massage, remontée mécanique, escape game, etc."
\ No newline at end of file
diff --git a/data/business_trips_content_en.docx b/data/business_trips_content_en.docx
new file mode 100644
index 0000000000000000000000000000000000000000..c4e026068990cec5ffbff745b26d46d8fcb8c8bf
Binary files /dev/null and b/data/business_trips_content_en.docx differ
diff --git a/data/business_trips_content_fr.docx b/data/business_trips_content_fr.docx
new file mode 100644
index 0000000000000000000000000000000000000000..68da3f932c463366391858706e7bffe5c5de4b64
Binary files /dev/null and b/data/business_trips_content_fr.docx differ
diff --git a/data/business_trips_content_until_3_en.docx b/data/business_trips_content_until_3_en.docx
new file mode 100644
index 0000000000000000000000000000000000000000..ff314c0a40d3875042473fd7a3405533f32a9dff
Binary files /dev/null and b/data/business_trips_content_until_3_en.docx differ
diff --git a/data/business_trips_content_until_3_enfr.docx b/data/business_trips_content_until_3_enfr.docx
new file mode 100644
index 0000000000000000000000000000000000000000..849b7f3f3655e252f1d712f2d7a68e215e36aa5a
Binary files /dev/null and b/data/business_trips_content_until_3_enfr.docx differ
diff --git a/data/business_trips_content_until_3_fr.docx b/data/business_trips_content_until_3_fr.docx
new file mode 100644
index 0000000000000000000000000000000000000000..4c4c5e99f7f59973133a7bcec232492defa8382a
Binary files /dev/null and b/data/business_trips_content_until_3_fr.docx differ
diff --git a/data/business_trips_content_until_9_en.docx b/data/business_trips_content_until_9_en.docx
new file mode 100644
index 0000000000000000000000000000000000000000..1d09f518827a373d64888667d504ae41c1940198
Binary files /dev/null and b/data/business_trips_content_until_9_en.docx differ
diff --git a/data/business_trips_plan_en.docx b/data/business_trips_plan_en.docx
new file mode 100644
index 0000000000000000000000000000000000000000..2fb08997e8264ca0975e2a4ff8918c6cef6e908c
Binary files /dev/null and b/data/business_trips_plan_en.docx differ
diff --git a/data/business_trips_plan_until_3_en.docx b/data/business_trips_plan_until_3_en.docx
new file mode 100644
index 0000000000000000000000000000000000000000..478c5570933cd939077ddfec2466f7c221a13149
Binary files /dev/null and b/data/business_trips_plan_until_3_en.docx differ
diff --git a/data/business_trips_plan_until_3_fr.docx b/data/business_trips_plan_until_3_fr.docx
new file mode 100644
index 0000000000000000000000000000000000000000..7a7c12cfee53a64d2051cfb77733d1ab2a4f2f16
Binary files /dev/null and b/data/business_trips_plan_until_3_fr.docx differ
diff --git a/data/business_trips_plan_until_9_en.docx b/data/business_trips_plan_until_9_en.docx
new file mode 100644
index 0000000000000000000000000000000000000000..186f4577e98856cfc8e32595ad212f3b4e3fe99a
Binary files /dev/null and b/data/business_trips_plan_until_9_en.docx differ
diff --git a/data/transports.docx b/data/transports.docx
new file mode 100644
index 0000000000000000000000000000000000000000..4c217bcc1f088225f296d0eb14eb11dad4700a44
Binary files /dev/null and b/data/transports.docx differ
diff --git a/data/transports_content_en.docx b/data/transports_content_en.docx
new file mode 100644
index 0000000000000000000000000000000000000000..c99dfe94b47ff9fede7362e30550427126afdf2d
Binary files /dev/null and b/data/transports_content_en.docx differ
diff --git a/data/transports_content_fr.docx b/data/transports_content_fr.docx
new file mode 100644
index 0000000000000000000000000000000000000000..19d685875054e5a425c7a5aacc3d402d9cfad683
Binary files /dev/null and b/data/transports_content_fr.docx differ
diff --git a/data/transports_plan.docx b/data/transports_plan.docx
new file mode 100644
index 0000000000000000000000000000000000000000..8172c4ff2a5f3ecc8196c1e408afaeaa78ca55b2
Binary files /dev/null and b/data/transports_plan.docx differ
diff --git a/data/transports_plan_en.docx b/data/transports_plan_en.docx
new file mode 100644
index 0000000000000000000000000000000000000000..5905b200d041f9b0867624d89ab533c659b939ed
Binary files /dev/null and b/data/transports_plan_en.docx differ
diff --git a/data/transports_plan_short_en.docx b/data/transports_plan_short_en.docx
new file mode 100644
index 0000000000000000000000000000000000000000..5ba6548062de9c9de67c5096ca2411f59ba42214
Binary files /dev/null and b/data/transports_plan_short_en.docx differ
diff --git a/data/transports_plan_short_fr.docx b/data/transports_plan_short_fr.docx
new file mode 100644
index 0000000000000000000000000000000000000000..c8b6097ff8e4c096e9e0f67e311f81c882da5cd3
Binary files /dev/null and b/data/transports_plan_short_fr.docx differ
diff --git a/data/~$ansports.docx b/data/~$ansports.docx
new file mode 100644
index 0000000000000000000000000000000000000000..1215360c558324f168a348f977b46d5a160ee437
Binary files /dev/null and b/data/~$ansports.docx differ
diff --git a/data/~$ansports_contenu.txt b/data/~$ansports_contenu.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eda54b1681d013d66d5b5ea588fe54f213436b9a
Binary files /dev/null and b/data/~$ansports_contenu.txt differ
diff --git a/data/~$placementsEtVoyages.docx b/data/~$placementsEtVoyages.docx
new file mode 100644
index 0000000000000000000000000000000000000000..1215360c558324f168a348f977b46d5a160ee437
Binary files /dev/null and b/data/~$placementsEtVoyages.docx differ
diff --git a/data/~$siness_trip_plan_until_3_fr.docx b/data/~$siness_trip_plan_until_3_fr.docx
new file mode 100644
index 0000000000000000000000000000000000000000..3a571977a55490f9911525c3c8debdadd32e9b95
Binary files /dev/null and b/data/~$siness_trip_plan_until_3_fr.docx differ
diff --git a/data/~$siness_trips_content_until_3_fr.docx b/data/~$siness_trips_content_until_3_fr.docx
new file mode 100644
index 0000000000000000000000000000000000000000..3a571977a55490f9911525c3c8debdadd32e9b95
Binary files /dev/null and b/data/~$siness_trips_content_until_3_fr.docx differ
diff --git a/data/~$siness_trips_content_until_9_en.docx b/data/~$siness_trips_content_until_9_en.docx
new file mode 100644
index 0000000000000000000000000000000000000000..40c3d5c3d1afe4ef002c13f20160b0e1d8fc96f3
Binary files /dev/null and b/data/~$siness_trips_content_until_9_en.docx differ
diff --git a/data/~$siness_trips_plan_until_9_en.docx b/data/~$siness_trips_plan_until_9_en.docx
new file mode 100644
index 0000000000000000000000000000000000000000..628830311cd684d862b7bebd29f7866e94ca1af9
Binary files /dev/null and b/data/~$siness_trips_plan_until_9_en.docx differ
diff --git a/src/__pycache__/control.cpython-310.pyc b/src/__pycache__/control.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4731bb228d5b9749f004a804981477f5ad64752b
Binary files /dev/null and b/src/__pycache__/control.cpython-310.pyc differ
diff --git a/src/app.py b/src/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca177a70a0882e766505918a97d633f698e2e97b
--- /dev/null
+++ b/src/app.py
@@ -0,0 +1,91 @@
+import gradio as gr
+
+
+import src.control as ctrl
+
+
+"""
+==================================
+A. Component part
+==================================
+"""
+
+with gr.Blocks() as hrqa:
+
+    with gr.Row():
+
+        with gr.Column():
+            pass
+
+        with gr.Column(scale=10):
+            """
+            1. input docs components
+            """
+
+            gr.Markdown("# Questions sur le vivre ensemble en entreprise")
+
+            input_text_comp = gr.Textbox(
+                label="",
+                lines=1,
+                max_lines=3,
+                interactive=True,
+                placeholder="Posez votre question ici",
+            )
+            input_example_comp = gr.Radio(
+                label="Examples de questions",
+                choices=["Remboursement de frais de voiture", "Recommandations de transport"],
+            )
+            output_text_comp = gr.Textbox(
+                label="La réponse automatique",
+                lines=2,
+                max_lines=10,
+                interactive=False,
+                visible=False,
+            )
+            sources_comp = gr.CheckboxGroup(
+                label="Documents sources",
+                visible=False,
+                interactive=False,
+            )
+
+        with gr.Column():
+            pass
+
+
+    def input_text_fn1():
+        update_ = {
+            output_text_comp: gr.update(visible=True),
+        }
+        return update_
+
+    def input_text_fn2(input_text_):
+        answer, sources = ctrl.get_response(query=input_text_)
+        source_labels = [s['distance']+' '+s['paragraph']+' '+s['title']+' from '+s['doc'] for s in sources]
+        update_ = {
+            output_text_comp: gr.update(value=answer),
+            sources_comp: gr.update(visible=True, choices=source_labels, value=source_labels)
+        }
+        return update_
+
+
+    def input_example_fn(input_example_):
+        examples = {
+            "Remboursement de frais de voiture": "Comment sont remboursés mes frais kilométriques sur mes trajets "
+                                                 "professionnels?",
+            "Recommandations de transport": "Quelles sont les recommandations de l'entreprise? Vaut-il mieux voyager en "
+                                            "train ou en avion?"
+        }
+        update_ = {
+            input_text_comp: gr.update(value=examples[input_example_]),
+            output_text_comp: gr.update(visible=True),
+        }
+        return update_
+
+    input_text_comp\
+        .submit(input_text_fn1, inputs=[], outputs=[output_text_comp])\
+        .then(input_text_fn2, inputs=[input_text_comp], outputs=[output_text_comp, sources_comp])
+    input_example_comp\
+        .change(input_example_fn, inputs=[input_example_comp], outputs=[input_text_comp, output_text_comp])\
+        .then(input_text_fn2, inputs=[input_text_comp], outputs=[output_text_comp, sources_comp])
+
+hrqa.queue().launch()
\ No newline at end of file
diff --git a/src/app2.py b/src/app2.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8f6c5fd78eb1c1d93eacfd4872f35bcf5374624
--- /dev/null
+++ b/src/app2.py
@@ -0,0 +1,16 @@
+from langchain.agents import create_csv_agent
+from langchain.agents import create_pandas_dataframe_agent
+import src.tools.llm as llm
+
+import pandas as pd
+
+path = '../data/AccomodationAndMealsForfaits_en.csv'
+#path = '../data/test_utf32.csv'
+df = pd.read_csv(path, encoding='utf32', sep=";")
+agent = create_pandas_dataframe_agent(llm.OpenAI(temperature=0), df, verbose=True)
+refund = agent.run("Quel est le remboursement pour un repas en Turkiye?")
+print(refund)
+
+
+
+pass
diff --git a/src/control.py b/src/control.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a6fcebfd8ea636f17a5f35914e0d92baa111751
--- /dev/null
+++ b/src/control.py
@@ -0,0 +1,49 @@
+import chromadb
+
+import src.tools.retriever as rtrvr
+import src.tools.llm as llm
+from src.domain.doc import Doc
+
+chroma_client = chromadb.Client()
+
+plan_language = 'en'
+content_language = 'en'
+path_plan = '../data/business_trips_plan_until_9_en.docx'
+path_content = '../data/business_trips_content_until_9_en.docx'
+collection_name = "until_9"
+
+doc_plan = Doc(path_plan)
+doc_content = Doc(path_content)
+collection_ = rtrvr.init_collections(chroma_client, doc_plan, doc_content, collection_name)
+
+
+def get_response(query):
+    if plan_language == 'en':
+        query = llm.translate(query)
+    sources = rtrvr.similarity_search(collection=collection_, query=query)
+    sources = select_best_sources(sources)
+    sources_contents = [s['content'] for s in sources]
+    context = '\n'.join(sources_contents)
+    answer = llm.generate_paragraph(query=query, context=context, language=content_language)
+    if content_language == 'en':
+        answer = llm.translate(text=answer, language='fr')
+    return answer.lstrip(), sources
+
+
+def select_best_sources(sources: [], delta_1_2=0.1, delta_1_n=0.25, absolute=1.1) -> []:
+    best_sources = []
+    for idx, s in enumerate(sources):
+        if idx == 0 \
+           or (s['distance_f'] - sources[idx - 1]['distance_f'] < delta_1_2
+                and s['distance_f'] - sources[0]['distance_f'] < delta_1_n) \
+           or s['distance_f'] < absolute:
+            best_sources.append(s)
+    return best_sources
+
+
+q1 = "Comment sont remboursés mes frais kilométriques sur mes déplacements avec mon véhicule personnel?"
+q2 = "Quels sont les moyens de transport recommandés par la société?"
+q3 = "est-ce que mes billets de cinéma peuvent être remboursés?"
+
+a2 = get_response(q3)
+print(a2)
diff --git a/src/control2.py b/src/control2.py
new file mode 100644
index 0000000000000000000000000000000000000000..f24b93f88dbaa056a364d83dfd1a156cea46470d
--- /dev/null
+++ b/src/control2.py
@@ -0,0 +1,36 @@
+from langchain.agents import AgentType, initialize_agent
+from langchain.tools import BaseTool, StructuredTool, Tool, tool
+
+
+from src.control import *
+
+
+@tool
+def similarity_search(query: str) -> str:
+    """
+    useful for when you look for relevant content about business trip policy : transport, accomodation, etc.
+    """
+    query = llm.translate(query)
+    sources = rtrvr.similarity_search(collection=collection_, query=query)
+    sources = select_best_sources(sources)
+    sources_contents = [s['content'] for s in sources]
+    context = '\n'.join(sources_contents)
+    return context
+
+
+@tool
+def generate_answer(query_and_context: str) -> str:
+    """
+    useful for when you have a query and the relevant content to generate an answer
+    """
+    answer = llm.generate_paragraph2(query_and_context=query_and_context, language='en')
+    answer = llm.translate(text=answer, language='fr')
+    return answer.lstrip()
+
+
+tools = [similarity_search, generate_answer]
+
+agent = initialize_agent(tools, llm.openai_llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
+q1 = "Comment sont remboursés mes frais kilométriques sur mes déplacements avec mon véhicule personnel?"
+q2 = "Quels sont les moyens de transport recommandés par la société?"
+ans = agent.run(q2)
diff --git a/src/domain/__pycache__/container.cpython-310.pyc b/src/domain/__pycache__/container.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1a86fb7609c303509fddda7a8e9d5c7cd6989244
Binary files /dev/null and b/src/domain/__pycache__/container.cpython-310.pyc differ
diff --git a/src/domain/__pycache__/doc.cpython-310.pyc b/src/domain/__pycache__/doc.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..df468f912a760375e636ee2c47dad438a92f58ac
Binary files /dev/null and b/src/domain/__pycache__/doc.cpython-310.pyc differ
diff --git a/src/domain/__pycache__/paragraph.cpython-310.pyc b/src/domain/__pycache__/paragraph.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..271387aeb3c6d23f54200de8bf2e25c50404b911
Binary files /dev/null and b/src/domain/__pycache__/paragraph.cpython-310.pyc differ
diff --git a/src/domain/__pycache__/style.cpython-310.pyc b/src/domain/__pycache__/style.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..915e0a5c6846062b59b9e9c705c6894fd7317f76
Binary files /dev/null and b/src/domain/__pycache__/style.cpython-310.pyc differ
diff --git a/src/domain/container.py b/src/domain/container.py
new file mode 100644
index 0000000000000000000000000000000000000000..d75f05fc75423db3424a8cf340d9f0b4de69532d
--- /dev/null
+++ b/src/domain/container.py
@@ -0,0 +1,136 @@
+from src.domain.paragraph import Paragraph
+
+INFINITE = 10000
+
+
+class Container:
+
+    def __init__(self, paragraphs: [Paragraph], title: Paragraph = None, level: int = 0, rank: int = 0, father=None,
+                 id_=0):
+        self.level = level
+        self.title = title
+        self.paragraphs = []
+        self.children = []
+        self.rank = rank
+        self.father = father  # if not father, then the container is at the top of the hierarchy
+        self.id_ = int(str(1) + str(father.id_) + str(id_))
+        if paragraphs:
+            self.paragraphs, self.children = self.create_children(paragraphs, level, rank + 1)
+
+    @property
+    def text(self):
+        text = ""
+        if self.title:
+            text = "Titre " + str(self.level) + " : " + self.title.text + '\n'
+        for p in self.paragraphs:
+            text += p.text + '\n'
+        for child in self.children:
+            text += child.text
+        return text
+
+    @property
+    def text_chunks(self, chunk=500):
+        text_chunks = []
+        text_chunk = ""
+        for p in self.paragraphs:
+            if chunk < len(text_chunk) + len(p.text):
+                text_chunks.append(text_chunk)
+                text_chunk = ""
+            else:
+                text_chunk += " " + p.text
+        if text_chunk and not text_chunk.isspace():
+            text_chunks.append(text_chunk)
+        for child in self.children:
+            text_chunks += child.text_chunks
+        return text_chunks
+
+    @property
+    def blocks(self):
+        block = {'content': "", 'rank': self.rank, 'level': self.level, 'title': ''}
+        if self.title:
+            block['title'] = self.title.text
+        for p in self.paragraphs:
+            block['content'] += p.text + '. '
+        blocks = [block]
+        for child in self.children:
+            blocks += child.blocks
+        return blocks
+
+    @property
+    def table_of_contents(self):
+        toc = []
+        if self.title:
+            toc += [{str(self.level): self.title.text}]
+        if self.children:
+            for child in self.children:
+                toc += child.table_of_contents
+        return toc
+
+    def move(self, position: int, new_father=None):
+        current_father = self.father  # should be added in the domain
+        current_father.children.remove(self)
+
+        self.rank = new_father.rank + 1 if new_father else 0
+        self.father = new_father
+        if position < len(new_father.children):
+            new_father.children.insert(position, self)
+        else:
+            new_father.children.append(self)
+
+    def create_children(self, paragraphs, level, rank) -> ([], []):
+        """
+        creates children containers or directly attached content
+        and returns the list of containers and contents of level+1
+        :return:
+        [Content or Container]
+        """
+        attached_paragraphs = []
+        container_paragraphs = []
+        container_title = None
+        children = []
+        in_children = False
+        level = INFINITE
+        child_id = 0
+
+        while paragraphs:
+            p = paragraphs.pop(0)
+            if not in_children and not p.is_structure:
+                attached_paragraphs.append(p)
+            else:
+                in_children = True
+                if p.is_structure and p.level <= level:  # if p is higher or equal in hierarchy
+                    if container_paragraphs or container_title:
+                        children.append(Container(container_paragraphs, container_title, level, rank, self, child_id))
+                        child_id += 1
+                    container_paragraphs = []
+                    container_title = p
+                    level = p.level
+
+                else:  # p is strictly lower in hierarchy
+                    container_paragraphs.append(p)
+
+        if container_paragraphs or container_title:
+            children.append(Container(container_paragraphs, container_title, level, rank, self, child_id))
+            child_id += 1
+
+        return attached_paragraphs, children
+
+    @property
+    def structure(self):
+
+        self_structure = {str(self.id_): {
+            'index': str(self.id_),
+            'canMove': True,
+            'isFolder': True,
+            'children': [p.id_ for p in self.paragraphs] + [child.id_ for child in self.children],
+            'canRename': True,
+            'data': {},
+            'level': self.level,
+            'rank': self.rank,
+            'title': self.title.text if self.title else 'root'
+        }}
+        paragraphs_structure = [p.structure for p in self.paragraphs]
+        structure = [self_structure] + paragraphs_structure
+        for child in self.children:
+            structure += child.structure
+        return structure
diff --git a/src/domain/doc.py b/src/domain/doc.py
new file mode 100644
index 0000000000000000000000000000000000000000..89f2aafdcd1e44b5d3c52f879e0ea5b043a19160
--- /dev/null
+++ b/src/domain/doc.py
@@ -0,0 +1,71 @@
+import docx
+
+from src.domain.container import Container
+from src.domain.paragraph import Paragraph
+from src.domain.style import Style
+
+
+class Doc:
+
+    def __init__(self, path='', id_=None):
+
+        self.xdoc = docx.Document(path)
+        self.title = path.split('/')[-1]
+        self.id_ = id(self)
+        self.path = path
+        paragraphs = [Paragraph(xp, self.id_, i) for (i, xp) in enumerate(self.xdoc.paragraphs)]
+        self.container = Container(paragraphs, father=self)
+        self.styles = [Style(xs, self.id_, i) for (i, xs) in enumerate(self.xdoc.styles)]
+
+    def save_as_docx(self, path):
+        self.xdoc.save(path)
+
+    def apply_styles_from(self, ref_doc):
+
+        ref_doc_styles_names = [s.xstyle.name for s in ref_doc.styles]
+        common_styles = [s for s in self.styles if s.xstyle.name in ref_doc_styles_names]
+
+        for s in common_styles:
+            s.copy_from(ref_doc.xdoc.styles[s.xstyle.name])
+
+    @property
+    def structure(self):
+
+        return self.container.structure
+
+    @property
+    def blocks(self):
+
+        def from_list_to_str(index_list):
+            index_str = str(index_list[0])
+            for el in index_list[1:]:
+                index_str += '.' + str(el)
+            return index_str
+
+        current_index = []
+        blocks = []
+        for block in self.container.blocks:
+            block['doc'] = self.title
+            current_level = len(current_index)
+            if 0 < block['level']:
+                if block['level'] == current_level:
+                    current_index[-1] += 1
+                elif current_level < block['level']:
+                    current_index.append(1)
+                elif block['level'] < current_level:
+                    current_index = current_index[:block['level']]
+                    current_index[-1] += 1
+                block['paragraph'] = from_list_to_str(current_index)
+            else:
+                block['paragraph'] = "0"
+            blocks.append(block)
+        return blocks
+
+
+
+
+
+
+
+
+
diff --git a/src/domain/paragraph.py b/src/domain/paragraph.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f0053f8cb93d2935b6825dcf8b88f7afd4b30a4
--- /dev/null
+++ b/src/domain/paragraph.py
@@ -0,0 +1,27 @@
+INFINITE = 10000
+
+
+class Paragraph:
+
+    def __init__(self, xparagraph, doc_id: int, id_: int):
+
+        self.xparagraph = xparagraph
+        self.id_ = int(str(2)+str(doc_id)+str(id_))
+        name = self.xparagraph.style.name
+        self.level = int(name.split(' ')[-1]) if 'Heading' in name else INFINITE
+        self.is_structure = self.level < INFINITE
+        self.text = self.xparagraph.text
+
+    @property
+    def structure(self):
+        structure = {str(self.id_): {
+            'index': str(self.id_),
+            'canMove': True,
+            'isFolder': False,
+            'children': [],
+            'title': self.text,
+            'canRename': True,
+            'data': {},
+            'level': self.level,
+        }}
+        return structure
diff --git a/src/domain/project.py b/src/domain/project.py
new file mode 100644
index 0000000000000000000000000000000000000000..8425706b3d05184efb44e6f5ed4a8fc438387aee
--- /dev/null
+++ b/src/domain/project.py
@@ -0,0 +1,9 @@
+from src.domain.doc import Doc
+
+
+class Project:
+
+    def __init__(self, name: str, docs: [Doc]):
+
+        self.docs = docs
+        self.name = name
diff --git a/src/domain/style.py b/src/domain/style.py
new file mode 100644
index 0000000000000000000000000000000000000000..41c197f070a41c0525232eb433cadfcd4e60f547
--- /dev/null
+++ b/src/domain/style.py
@@ -0,0 +1,121 @@
+from docx.enum.style import WD_STYLE_TYPE
+class Style:
+
+    def __init__(self, xstyle, doc_id, id_):
+
+        self.id_ = int(str(doc_id)+str(id_))
+        self.xstyle = xstyle
+        #self.new_style = self.copy_from
+
+    def copy_from(self, xref):  # need to be further developed
+
+        if xref.type == WD_STYLE_TYPE.PARAGRAPH:
+            self.xstyle.font.size = xref.font.size
+            self.xstyle.font.color.rgb = xref.font.color.rgb
+            self.xstyle.font.name = xref.font.name
+            self.xstyle.font.all_caps = xref.font.all_caps
+            # Read/write. Causes text in this font to appear in capital letters.
+            self.xstyle.font.bold = xref.font.bold
+            # Read/write. Causes text in this font to appear in bold.
+            self.xstyle.font.complex_script= xref.font.complex_script
+            # Read/write tri-state value. When True, causes the characters in
+            # the run to be treated as complex script regardless of their Unicode values.
+            # "complex script" refers to text written using a complex writing system such as Arabic, Hebrew, Tamil,
+            # Persian, and others.These scripts require special typesetting and handling because they have different
+            # writing directions, glyph connections, and letter shape variations. Word provides features that support
+            # these complex scripts, allowing users to easily create, edit, and format this type of text.
+            self.xstyle.font.cs_bold = xref.font.cs_bold
+            # Read/write tri-state value. When True, causes the complex script characters
+            # in the run to be displayed in bold typeface.
+            self.xstyle.font.cs_italic = xref.font.cs_italic
+            # Read/write tri-state value. When True, causes the complex script characters
+            # in the run to be displayed in italic typeface
+            self.xstyle.font.double_strike = xref.font.double_strike
+            # Read/write tri-state value. When True, causes the text in the run to appear with double strikethrough.
+            self.xstyle.font.emboss = xref.font.emboss
+            # Read/write tri-state value. When True, causes the text in the run to appear
+            # as if raised off the page in relief.
+            self.xstyle.font.hidden = xref.font.hidden
+            # Read/write tri-state value. When True, causes the text in the run to be hidden from display,
+            # unless applications settings force hidden text to be shown.
+            self.xstyle.font.highlight_color = xref.font.highlight_color
+            # A member of WD_COLOR_INDEX indicating the color of highlighting applied,
+            # or None if no highlighting is applied.
+            self.xstyle.font.imprint = xref.font.imprint
+            # Read/write tri-state value. When True,
+            # causes the text in the run to appear as if pressed into the page.
+            self.xstyle.font.italic = xref.font.italic
+            self.xstyle.font.math = xref.font.math
+            self.xstyle.font.no_proof = xref.font.no_proof
+            # Read/write tri-state value. When True, specifies that the contents of this run
+            # should not report any errors when the document is scanned for spelling and grammar.
+            self.xstyle.font.outline = xref.font.outline
+            # Read/write tri-state value. When True causes the characters in the run to appear as if they
+            # have an outline, by drawing a one pixel wide border around the inside and
+            # outside borders of each character glyph.
+            self.xstyle.font.rtl = xref.font.rtl
+            # Read/write tri-state value. When True causes the text in the
+            # run to have right-to-left characteristics.
+            self.xstyle.font.shadow = xref.font.shadow
+            self.xstyle.font.small_caps = xref.font.small_caps
+            self.xstyle.font.snap_to_grid = xref.font.snap_to_grid
+            # Read/write tri-state value. When True causes the run to use the document grid characters per line
+            # settings defined in the docGrid element when laying out the characters in this run.
+            # Snap to grid" is a layout feature that helps users align text boxes, images, or other objects precisely
+            # to a virtual gridline, ensuring consistent spacing and alignment of objects in a document. It improves the
+            # visual appearance of a document and makes it easier to read and understand. This feature is particularly
+            # useful for creating large documents such as reports, posters, and flyers, making them look more
+            # professional, organized, and readable."""
+            self.xstyle.font.spec_vanish = xref.font.spec_vanish
+            # Read/write tri-state value. When True, specifies that the given run shall always behave as if it is
+            # hidden, even when hidden text is being displayed in the current document. The property has a very narrow,
+            # specialized use related to the table of contents.
+            self.xstyle.font.strike = xref.font.strike
+            # Read/write tri-state value. When True causes the text in the run to appear with a single horizontal line
+            # through the center of the line.
+            self.xstyle.font.subscript = xref.font.subscript
+            # Boolean indicating whether the characters in this Font appear as subscript. None indicates the
+            # subscript/subscript value is inherited from the style hierarchy.
+            self.xstyle.font.superscript = xref.font.superscript
+            self.xstyle.font.underline = xref.font.underline
+            self.xstyle.font.web_hidden = xref.font.web_hidden
+            # Using the "Web hidden" property allows us to create multiple versions of a document where some content
+            # can be hidden, while other content can be displayed publicly. For example, in a resume, you can use the
+            # "Web hidden" property to hide private information such as phone numbers and addresses. This information
+            # will only be displayed when an employer chooses to view it.
+
+            self.xstyle.base_style = xref.base_style
+            # Style object this style inherits from or None if this style is not based on another style.
+            # self.xstyle.builtin = xref.builtin
+            self.xstyle.hidden = xref.hidden
+            # True if display of this style in the style gallery and list of recommended styles is suppressed.
+            # False otherwise. In order to be shown in the style gallery, this value must be False and quick_style
+            # must be True.
+            self.xstyle.locked = xref.locked
+            # True if this style is locked. not appear in the styles panel or the style gallery and cannot be applied
+            # to document content
+            self.xstyle.name = xref.name
+            self.xstyle.priority = xref.priority
+            # The integer sort key governing display sequence of this style in the Word UI. None indicates no setting
+            # is defined, causing Word to use the default value of 0. Style name is used as a secondary sort key to
+            # resolve ordering of styles having the same priority value.
+            # In Microsoft Word, "priority" is typically used to describe the importance of markers and comments to
+            # help authors and editors determine the urgency and priority of the feedback and changes being provided.
+            # For example, a document may use priority markers such as "high," "medium," "low," etc.
+            # to indicate issues that need to be addressed with a higher priority.
+
+            self.xstyle.quick_style = xref.quick_style
+            # True if this style should be displayed in the style gallery when hidden is False. Read/write Boolean.
+            # for example, Quick Styles can be found in the "Styles" group on the "Home" tab.
+            # self.xstyle.type = xref.type
+            self.xstyle.unhide_when_used = xref.unhide_when_used
+            # True if an application should make this style visible the next time it is applied to content.
+            # False otherwise. Note that python-docx does not automatically unhide a style having True for this
+            # attribute when it is applied to content.
+
+            # "unhide_when_used" can refer to a feature in Microsoft Excel. It is a cell format option that allows the
+            # cell to automatically show when it is being used and hide when it is not being used. This is useful when
+            # dealing with complex worksheets as it helps users manage and organize data better. When the user needs to
+            # edit or input data, the cell will automatically show, and once the user has completed the operation, the
+            # cell will automatically hide to better present the data.
+
diff --git a/src/domain/user.py b/src/domain/user.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9ca1d9aa6448c0c3e8d57bff0fdb4fe828a6d52
--- /dev/null
+++ b/src/domain/user.py
@@ -0,0 +1,4 @@
+class User:
+
+    def __init__(self, username, ):
+        self.name = username
diff --git a/src/tools/__pycache__/llm.cpython-310.pyc b/src/tools/__pycache__/llm.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fd6ccfeed211ae1374cf9c8c11d4b78d254454c4
Binary files /dev/null and b/src/tools/__pycache__/llm.cpython-310.pyc differ
diff --git a/src/tools/__pycache__/retriever.cpython-310.pyc b/src/tools/__pycache__/retriever.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..95e5165b27cd356a7cf88d21f9b99e0b879cf64e
Binary files /dev/null and b/src/tools/__pycache__/retriever.cpython-310.pyc differ
diff --git a/src/tools/llm.py b/src/tools/llm.py
new file mode 100644
index 0000000000000000000000000000000000000000..369fe406bee54dfd618c8a6d5f6c87c0495718e7
--- /dev/null
+++ b/src/tools/llm.py
@@ -0,0 +1,63 @@
+import os
+
+from langchain.llms import OpenAI
+
+OpenAI_KEY = "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"
+os.environ["OPENAI_API_KEY"] = OpenAI_KEY
+os.environ["TOKENIZERS_PARALLELISM"] = "true"
+openai_llm = OpenAI(temperature=0)
+
+
+def generate_paragraph(query: str, context: {}, language='fr') -> str:
+    """generates the final answer"""
+
+    template = (f"    Your task consists in generating a response in {language}\\n"
+                f"    to the following query: ```{query}```\n"
+                f"\n"
+                f"    Documents provided provided below delimited by  triple backticks gives you the context: \n"
+                f"    delimited by  triple backticks: ``` {context}``` \n"
+                f"    The response shall be concise and factual\n"
+    )
+
+    temp2 = (f" You are an agent designed to answer to the {query} based on the context delimited by triple backticks:\n"
+             f"``` {context}```\n"
+             f"    The response shall be in {language} and shall be concise and factual\n"
+             f"    In case the provided context does not seem relevant to answer to the question, just return that you "
+             f"    don't know the answer ")
+
+    llm = openai_llm
+    p = llm(temp2)
+
+    return p
+
+
+def translate(text: str, language="en") -> str:
+    """translates"""
+
+    languages = "french to english" if language == "en" else "english to french"
+
+    template = (f"    Your task consists in translating {languages}\\n"
+                f"    the following text: ```{text}```\n"
+                f"    delimited by by triple backticks"
+                )
+
+    llm = openai_llm
+    p = llm(template)
+
+    return p
+
+
+def generate_paragraph2(query_and_context: str, language: str = 'fr') -> str:
+    """generates the final answer"""
+
+    temp2 = (f" You are an agent designed to answer based on the query and content given below and delimited by triple"
+             f"  backticks:\n"
+             f"``` {query_and_context}```\n"
+             f"    The response shall be in {language} and shall be concise and factual\n"
+             f"    In case the provided context does not seem relevant to answer to the question, just return that you "
+             f"    don't know the answer ")
+
+    llm = openai_llm
+    p = llm(temp2)
+
+    return p
diff --git a/src/tools/retriever.py b/src/tools/retriever.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea089761ddedcc17da8a14f986fdb8f3977c35fd
--- /dev/null
+++ b/src/tools/retriever.py
@@ -0,0 +1,29 @@
+
+
+TOKENIZERS_PARALLELISM = True
+
+
+def init_collections(vs_client, doc_path, doc_content, collection_name):
+    plan_blocks = doc_path.blocks
+    content_blocks = doc_content.blocks
+    collection_ = vs_client.create_collection(name=collection_name)
+    collection_.add(
+        documents=[b['content'] for b in plan_blocks],
+        ids=[b['paragraph'] for b in plan_blocks],
+        metadatas=content_blocks
+    )
+    return collection_
+
+
+def similarity_search(collection: object, query: str) -> {}:
+    res = collection.query(query_texts=query)
+    sources = res['metadatas'][0]
+    distances = res['distances'][0]
+    for s, d in zip(sources, distances):
+        s['distance_f'] = d
+        s['distance'] = format(d, '.2f')
+    return sources
+
+
+
+