Spaces:

Hexamind
/

QnA

Runtime error

App Files Files Community

YvesP commited on Jun 11, 2023

Commit

7fea1f4

1 Parent(s): 030bd2d

added file management

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

__pycache__/config.cpython-310.pyc +0 -0
app.py +25 -83
config.py +34 -0
data/.DS_Store +0 -0
data/AccomodationAndMealsForfaits_en.csv +0 -0
data/AccomodationAndMealsForfaits_en.numbers +0 -0
data/AccomodationAndMealsForfaits_fr.csv +0 -31
data/AccomodationAndMealsForfaits_fr.numbers +0 -0
data/BaremeTauxEloignement.csv +0 -84
data/ForfaitsRemboursements.csv +0 -31
data/NonPrisEnCharge.csv +0 -14
data/accommodation_meal_rates.csv +31 -0
data/business_trips_content_en.docx +0 -0
data/business_trips_content_fr.docx +0 -0
data/business_trips_content_until_3_en.docx +0 -0
data/business_trips_content_until_3_enfr.docx +0 -0
data/business_trips_content_until_3_fr.docx +0 -0
data/{business_trips_content_until_9_en.docx → business_trips_content_until_end_en.docx} +0 -0
data/business_trips_content_until_end_fr.docx +0 -0
data/business_trips_plan_en.docx +0 -0
data/business_trips_plan_until_3_en.docx +0 -0
data/business_trips_plan_until_3_fr.docx +0 -0
data/{business_trips_plan_until_9_en.docx → business_trips_plan_until_end_en.docx} +0 -0
data/remote_rates.csv +83 -0
data/transports.docx +0 -0
data/transports_content_en.docx +0 -0
data/transports_content_fr.docx +0 -0
data/transports_plan.docx +0 -0
data/transports_plan_en.docx +0 -0
data/transports_plan_short_en.docx +0 -0
data/transports_plan_short_fr.docx +0 -0
data/~$ansports_contenu.txt +0 -0
data/~$placementsEtVoyages.docx +0 -0
data/~$siness_trip_plan_until_3_fr.docx +0 -0
data/~$siness_trips_content_until_3_fr.docx +0 -0
data/~$siness_trips_content_until_9_en.docx +0 -0
data/{~$ansports.docx → ~$siness_trips_content_until_end_en.docx} +0 -0
data/~$siness_trips_plan_until_9_en.docx +0 -0
requirements.txt +0 -98
src/.DS_Store +0 -0
src/control/__pycache__/control.cpython-310.pyc +0 -0
src/control/control.py +105 -33
src/domain/__pycache__/container.cpython-310.pyc +0 -0
src/domain/__pycache__/doc.cpython-310.pyc +0 -0
src/domain/__pycache__/paragraph.cpython-310.pyc +0 -0
src/domain/__pycache__/style.cpython-310.pyc +0 -0
src/domain/project.py +0 -9
src/domain/style.py +0 -121
src/domain/user.py +0 -4
src/model/__pycache__/block.cpython-310.pyc +0 -0

__pycache__/config.cpython-310.pyc ADDED Viewed

Binary file (1.91 kB). View file

app.py CHANGED Viewed

@@ -1,91 +1,33 @@
-import gradio as gr
-import src.control.control as ctrl
-"""
-==================================
-A. Component part
-==================================
-"""
-with gr.Blocks() as hrqa:
-    with gr.Row():
-        with gr.Column():
-            pass
-        with gr.Column(scale=10):
-            """
-            1. input docs components
-            """
-            gr.Markdown("# Questions sur le vivre ensemble en entreprise")
-            input_text_comp = gr.Textbox(
-                label="",
-                lines=1,
-                max_lines=3,
-                interactive=True,
-                placeholder="Posez votre question ici",
-            )
-            input_example_comp = gr.Radio(
-                label="Examples de questions",
-                choices=["Remboursement de frais de voiture", "Recommandations de transport"],
-            )
-            output_text_comp = gr.Textbox(
-                label="La réponse automatique",
-                lines=2,
-                max_lines=10,
-                interactive=False,
-                visible=False,
-            )
-            sources_comp = gr.CheckboxGroup(
-                label="Documents sources",
-                visible=False,
-                interactive=False,
-            )
-        with gr.Column():
-            pass
-    def input_text_fn1():
-        update_ = {
-            output_text_comp: gr.update(visible=True),
-        }
-        return update_
-    def input_text_fn2(input_text_):
-        answer, sources = ctrl.get_response(query=input_text_)
-        source_labels = [s['distance']+' '+s['paragraph']+' '+s['title']+' from '+s['doc'] for s in sources]
-        update_ = {
-            output_text_comp: gr.update(value=answer),
-            sources_comp: gr.update(visible=True, choices=source_labels, value=source_labels)
-        }
-        return update_
-    def input_example_fn(input_example_):
-        examples = {
-            "Remboursement de frais de voiture": "Comment sont remboursés mes frais kilométriques sur mes trajets "
-                                                 "professionnels?",
-            "Recommandations de transport": "Quelles sont les recommandations de l'entreprise? Vaut-il mieux voyager en "
-                                            "train ou en avion?"
-        }
-        update_ = {
-            input_text_comp: gr.update(value=examples[input_example_]),
-            output_text_comp: gr.update(visible=True),
-        }
-        return update_
-    input_text_comp\
-        .submit(input_text_fn1, inputs=[], outputs=[output_text_comp])\
-        .then(input_text_fn2, inputs=[input_text_comp], outputs=[output_text_comp, sources_comp])
-    input_example_comp\
-        .change(input_example_fn, inputs=[input_example_comp], outputs=[input_text_comp, output_text_comp])\
-        .then(input_text_fn2, inputs=[input_text_comp], outputs=[output_text_comp, sources_comp])
-hrqa.queue().launch()

+import pandas as pd
+import os
+from langchain.llms import OpenAI
+import chromadb
+from config import *
+from src.control.control import Controller
+from src.tools.retriever import Retriever
+from src.tools.llm import LlmAgent
+from src.model.doc import Doc
+import src.view.view as view
+os.environ["OPENAI_API_KEY"] = OpenAI_KEY
+os.environ["TOKENIZERS_PARALLELISM"] = "true"
+doc_content = Doc(content_en_path)
+doc_plan = Doc(plan_path)
+doc_content_fr = Doc(content_fr_path)
+client_db = chromadb.Client()
+retriever = Retriever(client_db, doc_plan, doc_content, doc_content_fr, collection_name)
+llm_model = OpenAI(temperature=0)
+llm = LlmAgent(llm_model)
+specials['remote_rate_df'] = pd.read_csv(specials['remote_rate_path'])
+specials['accommodation_meal_df'] = pd.read_csv(specials['accommodation_meal_path'])
+controller = Controller(retriever=retriever, llm=llm,  content_language=content_language, plan_language=plan_language,
+                        specials=specials)
+qna = view.run(ctrl=controller, examples=examples)
+qna.queue().launch()

config.py ADDED Viewed

	@@ -0,0 +1,34 @@

+plan_language = 'en'
+content_language = 'en'
+plan_path = 'data/business_trips_plan_until_end_en.docx'
+content_en_path = 'data/business_trips_content_until_end_en.docx'
+content_fr_path = 'data/business_trips_content_until_end_fr.docx'
+collection_name = "until_end"
+OpenAI_KEY = "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"
+examples = {
+    "Remboursement de frais de voiture": "Comment sont remboursés mes frais kilométriques sur mes trajets "
+                                         "professionnels?",
+    "Recommandations de transport": "Quelles sont les recommandations de l'entreprise? "
+                                    "Vaut-il mieux voyager en train ou en avion?",
+    "Indemnités pour des séjours longs à l'étranger": "Y a-t-il des indemnités pour des séjours longs à l'étranger?",
+    "Indemnités pour des séjours longs en Bolivie": "Y a-t-il des indemnités pour des séjours longs en Bolivie?",
+    "Indemnités pour les repas aux Pays-Bas": "Quelles sont les indemnités pour les repas au Pays-Bas?"
+}
+countries_extensions = {
+    'Royaume-Uni': ['UK', 'U.K.','RU', 'R.U.', 'Angleterre'],
+    'Etats-Unis': ['Etats-unis', 'Etats Unis', 'Etats unis', 'ETATS-UNIS', 'USA'],
+    'E.A.U': ["EAU", "Emirats", "Emirats Arabes Unis", "Emirates", "UAE", "United Arab Emirates"],
+    'Pays-Bas': ['Les Pays-Bas']
+}
+specials = {'remote_rate_path': 'data/remote_rates.csv',
+            'remote_rate_known': "the scale rate of remoteness for the ",
+            'remote_rate_unknown': "the scale rate of remoteness for the country  mentionned is unknown. Allowances "
+            "apply though",
+            'accommodation_meal_path': 'data/accommodation_meal_rates.csv',
+            'accommodation_meal_known': 'the rates for accommodation and meals are the following: ',
+            'accommodation_meal_unknown': 'the rates for accommodation and meals are not defined for the country '
+                                              'mentionned ',
+            'countries_extensions': countries_extensions,
+            }

data/.DS_Store CHANGED Viewed

Binary files a/data/.DS_Store and b/data/.DS_Store differ

data/AccomodationAndMealsForfaits_en.csv DELETED Viewed

Binary file (1.91 kB)

data/AccomodationAndMealsForfaits_en.numbers DELETED Viewed

Binary file (160 kB)

data/AccomodationAndMealsForfaits_fr.csv DELETED Viewed

@@ -1,31 +0,0 @@
-Destination;Hebergement;Repas
-France;125;27
-Allemagne;150;35
-Arabie Saoudite;200;40
-Autriche;110;40
-Belgique;150;35
-Canada;150;30
-Chine;113;37
-Egypte;150;25
-Emirats Arabes Unis;160;46
-Espagne;130;30
-Etats-Unis;140;47
-Gr�ce;140;25
-Inde;160;47
-Irlande;180;30
-Italie;120;37
-Japon;150;25
-Maroc;110;25
-Mexique;130;27
-Norv�ge;160;40
-Pays-Bas;150;32
-Pologne;110;23
-Portugal;108;25
-Qatar;210;35
-Royaume-Uni;130;28
-Russie;180;50
-Singapour;170;42
-Su�de;90;30
-Suisse;192;35
-Taiwan;123;37
-Turquie;150;28

data/AccomodationAndMealsForfaits_fr.numbers DELETED Viewed

Binary file (163 kB)

data/BaremeTauxEloignement.csv DELETED Viewed

@@ -1,84 +0,0 @@
-Tableau 1
-Barème Taux d’Éloignement;
-Afrique du sud;10 %
-Algérie;15 %
-Allemagne;0 %
-Arabie saoudite;12 %
-Argentine;11 %
-Australie;3 %
-Autriche;0 %
-Belgique;0 %
-Bolivie;11 %
-Brésil;11 %
-Bulgarie;10 %
-Cameroun;13 %
-Canada;3 %
-Chili;9 %
-Chine;13 %
-Chypre;4 %
-Colombie;13 %
-Corée;11 %
-Croatie;7 %
-Danemark;0 %
-Djibouti;13 %
-E.A.U;9 %
-Egypte;16 %
-Equateur;12 %
-Espagne;0 %
-Estonie;7 %
-Etats unis;3 %
-Ethiopie;12 %
-Finlande;0 %
-Grande Bretagne;0 %
-Grèce;0 %
-Guadeloupe;3 %
-Guyane;7 %
-Hong Kong;8 %
-Hongrie;6 %
-Ile Maurice;8 %
-Inde;15 %
-Indonésie;17 %
-Irlande;0 %
-Israël;9 %
-Italie;0 %
-Japon;8 %
-Jordanie;10 %
-Kenya;13 %
-Koweït;11 %
-Laos;13 %
-Luxembourg;0 %
-Madagascar;13 %
-Malaisie;14 %
-Maroc;8 %
-Martinique;3 %
-Mauritanie;10 %
-Mexique;12 %
-Mozambique;14 %
-Nigeria;17 %
-Norvège;0 %
-Nouvelle Calédonie;4 %
-Pakistan;17 %
-Pérou;13 %
-Philippines;16 %
-Pologne;8 %
-Polynésie;5 %
-Portugal;0 %
-Qatar;9 %
-République Congo;14 %
-République tchèque;6 %
-Roumanie;11 %
-Russie;13 %
-Sénégal;10 %
-Serbie;11 %
-Singapour;6 %
-Slovaquie;6 %
-Sri Lanka;15 %
-Suède;0 %
-Suisse;0 %
-Taiwan;11 %
-Thaïlande;12 %
-Tunisie;7 %
-Turquie;10 %
-Ukraine;12 %
-Venezuela;13 %
-Vietnam;13 %

data/ForfaitsRemboursements.csv DELETED Viewed

@@ -1,31 +0,0 @@
-Destination;;Hébergement;;Repas
-France;;IDF 125€ / Province 100€;;27 €
-Allemagne;;150 € ;;35 €
-Arabie Saoudite;;200 € ;;40 €
-Autriche;;110 € ;;40 €
-Belgique;;150 € ;;35 €
-Canada;;150 € ;;30 €
-Chine;;113 € ;;37 €
-Egypte;;150 € ;;25 €
-Emirats Arabes Unis;;160 € ;;46 €
-Espagne;;130 € ;;30 €
-Etats-Unis;;140 € ;;47 €
-Grèce;;140 € ;;25 €
-Inde;;160 € ;;47 €
-Irlande;;180 € ;;30 €
-Italie;;120 € ;;37 €
-Japon;;150 € ;;25 €
-Maroc;;110 € ;;25 €
-Mexique;;130 € ;;27 €
-Norvège;;160 € ;;40 €
-Pays-Bas;;150 € ;;32 €
-Pologne;;110 € ;;23 €
-Portugal;;108 € ;;25 €
-Qatar;;210 € ;;35 €
-Royaume-Uni;;130 € ;;28 €
-Russie;;180 € ;;50 €
-Singapour;;170 € ;;42 €
-Suède;;90 € ;;30 €
-Suisse;;192 € ;;35 €
-Taiwan;;123 € ;;37 €
-Turquie;;150 € ;;28 €

data/NonPrisEnCharge.csv DELETED Viewed

@@ -1,14 +0,0 @@
-Non pris en charge via Note de Frais
-"Matériel informatique : téléphone, chargeur, tablette, adaptateur prise, etc."
-"Outillage : balai, tournevis, disque de disqueuse, etc."
-"Mobilier/Aménagement de bureau : plantes, dalles, poufs, etc."
-"Fournitures de bureau : café, piles, etc."
-Conférence/Cotisation
-Séminaire/Réunion Team Building
-Doublon de clés
-Equipement de Protection Individuelle (EPI)
-Achat de bagagerie : neuf/perdu/endommagé
-Lavage et recharge Carte de Lavage : tous types de véhicule
-Consommation alcoolisée
-"Collation : confiserie, gâteau, boisson, etc."
-"Prestation de loisirs : Spa, piscine, massage, remontée mécanique, escape game, etc."

data/accommodation_meal_rates.csv ADDED Viewed

	@@ -0,0 +1,31 @@

+country,pays,accommodation,meal
+France,France,Ile de France 125€ / Province 100€,27€
+Germany,Allemagne,150€,35€
+Saudi Arabia,Arabie Saoudite,200€,40€
+Austria,Autriche,110€,40€
+Belgium,Belgique,150€,35€
+Canada,Canada,150€,30€
+China,Chine,113€,37€
+Egypt,Egypte,150€,25€
+United Arab Emirates,Emirats Arabes Unis,160€,46€
+Spain,Espagne,130€,30€
+United States,Etats-Unis,140€,47€
+Greece,Grèce,140€,25€
+India,Inde,160€,47€
+Ireland,Irlande,180€,30€
+Italy,Italie,120€,37€
+Japan,Japon,150€,25€
+Morocco,Maroc,110€,25€
+Mexico,Mexique,130€,27€
+Norway,Norvège,160€,40€
+The Netherlands,Pays-Bas,150€,32€
+Poland,Pologne,110€,23€
+Portugal,Portugal,108€,25€
+Qatar,Qatar,210€,35€
+United Kingdom,Royaume-Uni,130€,28€
+Russia,Russie,180€,50€
+Singapore,Singapour,170€,42€
+Sweden,Suède,90€,30€
+Swiss,Suisse,192€,35€
+taiwan,Taïwan,123€,37€
+Türkiye,Turquie,150€,28€

data/business_trips_content_en.docx DELETED Viewed

Binary file (42.7 kB)

data/business_trips_content_fr.docx DELETED Viewed

Binary file (70 kB)

data/business_trips_content_until_3_en.docx DELETED Viewed

Binary file (42.7 kB)

data/business_trips_content_until_3_enfr.docx DELETED Viewed

Binary file (65.7 kB)

data/business_trips_content_until_3_fr.docx DELETED Viewed

Binary file (70.1 kB)

data/{business_trips_content_until_9_en.docx → business_trips_content_until_end_en.docx} RENAMED Viewed

Binary files a/data/business_trips_content_until_9_en.docx and b/data/business_trips_content_until_end_en.docx differ

data/business_trips_content_until_end_fr.docx ADDED Viewed

Binary file (74.6 kB). View file

data/business_trips_plan_en.docx DELETED Viewed

Binary file (50.5 kB)

data/business_trips_plan_until_3_en.docx DELETED Viewed

Binary file (36.1 kB)

data/business_trips_plan_until_3_fr.docx DELETED Viewed

Binary file (35.6 kB)

data/{business_trips_plan_until_9_en.docx → business_trips_plan_until_end_en.docx} RENAMED Viewed

Binary files a/data/business_trips_plan_until_9_en.docx and b/data/business_trips_plan_until_end_en.docx differ

data/remote_rates.csv ADDED Viewed

	@@ -0,0 +1,83 @@

+pays,country,rate
+Afrique du sud,South Africa,10%
+Algérie,Algeria,15%
+Allemagne,Germany,0%
+Arabie saoudite,Saudi Arabia,12%
+Argentine,Argentina,11%
+Australie,Australia,3%
+Autriche,Austria,0%
+Belgique,Belgium,0%
+Bolivie,Bolivia,11%
+Brésil,Brazil,11%
+Bulgarie,Bulgaria,10%
+Cameroun,Cameroon,13%
+Canada,Canada,3%
+Chili,Chile,9%
+Chine,China,13%
+Chypre,Cyprus,4%
+Colombie,Colombia,13%
+Corée,Korea,11%
+Croatie,Croatia,7%
+Danemark,Denmark,0%
+Djibouti,Djibouti,13%
+E.A.U,united arab emirates,9%
+Egypte,Egypt,16%
+Equateur,Ecuador,12%
+Espagne,Spain,0%
+Estonie,Estonia,7%
+Etats-Unis,United States,3%
+Ethiopie,Ethiopia,12%
+Finlande,Finland,0%
+Grande Bretagne,Britain,0%
+Grèce,Greece,0%
+Guadeloupe,Guadeloupe,3%
+Guyane,Guyana,7%
+Hong Kong,hong kong,8%
+Hongrie,Hungary,6%
+Ile Maurice,Mauritius Islands,8%
+Inde,India,15%
+Indonésie,Indonesia,17%
+Irlande,Ireland,0%
+Israël,Israel,9%
+Italie,Italy,0%
+Japon,Japan,8%
+Jordanie,Jordan,10%
+Kenya,Kenya,13%
+Koweït,Kuwait,11%
+Laos,Laos,13%
+Luxembourg,Luxemburg,0%
+Madagascar,Madagascar,13%
+Malaisie,Malaysia,14%
+Maroc,Morocco,8%
+Martinique,Martinique,3%
+Mauritanie,Mauritania,10%
+Mexique,Mexico,12%
+Mozambique,mozambique,14%
+Nigeria,Nigeria,17%
+Norvège,Norway,0%
+Nouvelle Calédonie,New Caledonia,4%
+Pakistan,Pakistan,17%
+Pérou,Peru,13%
+Philippines,Philippines,16%
+Pologne,Poland,8%
+Polynésie,Polynesia,5%
+Portugal,Portugal,0%
+Qatar,Qatar,9%
+République Congo,Republic of the Congo,14%
+République tchèque,Czech Republic,6%
+Roumanie,Romania,11%
+Russie,Russia,13%
+Sénégal,Senegal,10%
+Serbie,Serbia,11%
+Singapour,Singapore,6%
+Slovaquie,Slovakia,6%
+Sri Lanka,Sri Lanka,15%
+Suède,Sweden,0%
+Suisse,Swiss,0%
+Taiwan,taiwan,11%
+Thaïlande,Thailand,12%
+Tunisie,Tunisia,7%
+Turquie,Türkiye,10%
+Ukraine,Ukraine,12%
+Venezuela,Venezuela,13%
+Vietnam,Vietnam,13%

data/transports.docx DELETED Viewed

Binary file (41.2 kB)

data/transports_content_en.docx DELETED Viewed

Binary file (40.4 kB)

data/transports_content_fr.docx DELETED Viewed

Binary file (41.2 kB)

data/transports_plan.docx DELETED Viewed

Binary file (35.5 kB)

data/transports_plan_en.docx DELETED Viewed

Binary file (35.5 kB)

data/transports_plan_short_en.docx DELETED Viewed

Binary file (35.1 kB)

data/transports_plan_short_fr.docx DELETED Viewed

Binary file (35 kB)

data/~$ansports_contenu.txt DELETED Viewed

Binary file (162 Bytes)

data/~$placementsEtVoyages.docx DELETED Viewed

Binary file (162 Bytes)

data/~$siness_trip_plan_until_3_fr.docx DELETED Viewed

Binary file (162 Bytes)

data/~$siness_trips_content_until_3_fr.docx DELETED Viewed

Binary file (162 Bytes)

data/~$siness_trips_content_until_9_en.docx DELETED Viewed

Binary file (162 Bytes)

data/{~$ansports.docx → ~$siness_trips_content_until_end_en.docx} RENAMED Viewed

File without changes

data/~$siness_trips_plan_until_9_en.docx DELETED Viewed

Binary file (162 Bytes)

requirements.txt CHANGED Viewed

@@ -1,102 +1,4 @@
-aiofiles==23.1.0
-aiohttp==3.8.4
-aiosignal==1.3.1
-altair==5.0.1
-anyio==3.7.0
-async-timeout==4.0.2
-attrs==23.1.0
-backoff==2.2.1
-certifi==2023.5.7
-charset-normalizer==3.1.0
 chromadb==0.3.25
-click==8.1.3
-clickhouse-connect==0.5.25
-coloredlogs==15.0.1
-contourpy==1.0.7
-cycler==0.11.0
-dataclasses-json==0.5.7
-duckdb==0.8.0
-exceptiongroup==1.1.1
-fastapi==0.96.0
-ffmpy==0.3.0
-filelock==3.12.0
-flatbuffers==23.5.26
-fonttools==4.39.4
-frozenlist==1.3.3
-fsspec==2023.5.0
 gradio==3.33.1
-gradio_client==0.2.5
-h11==0.14.0
-hnswlib==0.7.0
-httpcore==0.17.2
-httptools==0.5.0
-httpx==0.24.1
-huggingface-hub==0.15.1
-humanfriendly==10.0
-idna==3.4
-Jinja2==3.1.2
-jsonschema==4.17.3
-kiwisolver==1.4.4
 langchain==0.0.190
-linkify-it-py==2.0.2
-lxml==4.9.2
-lz4==4.3.2
-markdown-it-py==2.2.0
-MarkupSafe==2.1.3
-marshmallow==3.19.0
-marshmallow-enum==1.5.1
-matplotlib==3.7.1
-mdit-py-plugins==0.3.3
-mdurl==0.1.2
-monotonic==1.6
-mpmath==1.3.0
-multidict==6.0.4
-mypy-extensions==1.0.0
-numexpr==2.8.4
-numpy==1.24.3
-onnxruntime==1.15.0
-openai==0.27.7
-openapi-schema-pydantic==1.2.4
-orjson==3.9.0
-overrides==7.3.1
-packaging==23.1
 pandas==2.0.2
-Pillow==9.5.0
-posthog==3.0.1
-protobuf==4.23.2
-pydantic==1.10.8
-pydub==0.25.1
-Pygments==2.15.1
-pyparsing==3.0.9
-pyrsistent==0.19.3
-python-dateutil==2.8.2
-python-docx==0.8.11
-python-dotenv==1.0.0
-python-multipart==0.0.6
-pytz==2023.3
-PyYAML==6.0
-regex==2023.6.3
-requests==2.31.0
-semantic-version==2.10.0
-six==1.16.0
-sniffio==1.3.0
-SQLAlchemy==2.0.15
-starlette==0.27.0
-sympy==1.12
-tabulate==0.9.0
-tenacity==8.2.2
-tiktoken==0.4.0
-tokenizers==0.13.3
-toolz==0.12.0
-tqdm==4.65.0
-typing-inspect==0.9.0
-typing_extensions==4.6.3
-tzdata==2023.3
-uc-micro-py==1.0.2
-urllib3==2.0.2
-uvicorn==0.22.0
-uvloop==0.17.0
-watchfiles==0.19.0
-websockets==11.0.3
-yarl==1.9.2
-zstandard==0.21.0

 chromadb==0.3.25
 gradio==3.33.1
 langchain==0.0.190
 pandas==2.0.2

src/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

src/control/__pycache__/control.cpython-310.pyc ADDED Viewed

Binary file (5.95 kB). View file

src/control/control.py CHANGED Viewed

@@ -1,43 +1,115 @@
-import chromadb
-import src.tools.retriever as rtrvr
-import src.tools.llm as llm
-from src.domain.doc import Doc
-chroma_client = chromadb.Client()
-plan_language = 'en'
-content_language = 'en'
-path_plan = 'data/business_trips_plan_until_9_en.docx'
-path_content = 'data/business_trips_content_until_9_en.docx'
-collection_name = "until_9"
-doc_plan = Doc(path_plan)
-doc_content = Doc(path_content)
-collection_ = rtrvr.init_collections(chroma_client, doc_plan, doc_content, collection_name)
-def get_response(query):
-    if plan_language == 'en':
-        query = llm.translate(query)
-    sources = rtrvr.similarity_search(collection=collection_, query=query)
-    sources = select_best_sources(sources)
-    sources_contents = [s['content'] for s in sources]
-    context = '\n'.join(sources_contents)
-    answer = llm.generate_paragraph(query=query, context=context, language=content_language)
-    if content_language == 'en':
-        answer = llm.translate(text=answer, language='fr')
-    return answer.lstrip(), sources
-def select_best_sources(sources: [], delta_1_2=0.1, delta_1_n=0.25, absolute=1.1) -> []:
-    best_sources = []
-    for idx, s in enumerate(sources):
-        if idx == 0 \
-           or (s['distance_f'] - sources[idx - 1]['distance_f'] < delta_1_2
-                and s['distance_f'] - sources[0]['distance_f'] < delta_1_n) \
-           or s['distance_f'] < absolute:
-            best_sources.append(s)
-    return best_sources

+import pandas as pd
+from src.tools.retriever import Retriever
+from src.tools.llm import LlmAgent
+from src.model.block import Block
+class Controller:
+    def __init__(self, retriever: Retriever, llm: LlmAgent, plan_language: str, content_language: str, specials: {}):
+        self.plan_language = plan_language
+        self.content_language = content_language
+        self.retriever = retriever
+        self.specials = specials
+        self.llm = llm
+    def get_response(self, query_fr: str) -> (str, [Block]):
+        query = self.llm.translate(text=query_fr) if self.plan_language == 'en' else query_fr
+        block_sources = self.retriever.similarity_search(query=query)
+        block_sources = self._select_best_sources(block_sources)
+        for block in block_sources:
+            self._expand_block_with_specials(block, query_fr)
+        sources_contents = [s.content for s in block_sources]
+        context = '\n'.join(sources_contents)
+        answer = self.llm.generate_paragraph(query=query, context=context, language=self.content_language)
+        sources_contents_fr = [s.content_fr for s in block_sources[:2]]
+        context_fr = '\n'.join(sources_contents_fr)
+        if self.content_language == 'en':
+            answer = self.llm.generate_answer(answer_en=answer, query=query_fr, context_fr=context_fr)
+        answer = answer.strip().strip("'''").strip("```")
+        return answer, block_sources
+    @staticmethod
+    def _select_best_sources(sources: [Block], delta_1_2=0.1, delta_1_n=0.25, absolute=1.1, alpha=0.85) -> [Block]:
+        """
+        Select the best sources: not far from the very best, not far from the last selected, and not too bad per se
+        """
+        best_sources = []
+        for idx, s in enumerate(sources):
+            if idx == 0 \
+                    or (s.distance - sources[idx - 1].distance < delta_1_2
+                        and s.distance - sources[0].distance < delta_1_n) \
+                    or s.distance < absolute:
+                best_sources.append(s)
+                delta_1_2 *= alpha
+                delta_1_n *= alpha
+                absolute *= alpha
+            else:
+                break
+        return best_sources
+    def _expand_block_with_specials(self, block: Block, query: str) -> Block:
+        """
+        Performs special treatments for blocks expanding the text in the block
+        For example, it may add specific content extracted from a table based on elements of the query
+        """
+        def any_in(l1: [], l2: []) -> bool:
+            """
+            checks if any of el in l1 belongs to l2
+            """
+            return 0 < len([el for el in l1 if el in l2])
+        def get_countries_names(df: pd.DataFrame) -> [str]:
+            """
+            extends the ortograph of countries: ex. Etats-Unis = USA = Etats Unis, etc.
+            """
+            countries_fr = list(df['pays'])
+            countries_en = list(df['country'])
+            countries_names = {c_fr: [c_fr, c_en] for c_fr, c_en in zip(countries_fr, countries_en)}
+            countries_extensions = self.specials['countries_extensions']
+            for c in set(countries_extensions.keys()).intersection(set(countries_names.keys())):
+                countries_names[c] += countries_extensions[c]
+            return countries_names
+        def remote_rate_fn(ctrl: Controller, block: Block, query: str) -> Block:
+            remote_rate_df = self.specials['remote_rate_df']
+            remote_rate_known = self.specials['remote_rate_known']
+            remote_rate_unknown = self.specials['remote_rate_unknown']
+            countries_fr = list(remote_rate_df['pays'])
+            countries_names = get_countries_names(remote_rate_df)
+            countries_of_interest = [c for c in countries_fr if any_in(countries_names[c], query)]
+            for c in countries_of_interest:
+                rate = remote_rate_df[remote_rate_df['pays'] == c]['rate'].values[0]
+                block.content += remote_rate_known + c + " is " + rate + '\n'
+            if len(countries_of_interest) == 0:
+                block.content += remote_rate_unknown
+            return block
+        def accommodation_meal_fn(ctrl: Controller, block: Block, query: str) -> Block:
+            accommodation_meal_df = self.specials['accommodation_meal_df']
+            accommodation_meal_known = self.specials['accommodation_meal_known']
+            accommodation_meal_unknown = self.specials['accommodation_meal_unknown']
+            countries_fr = list(accommodation_meal_df['pays'])
+            countries_names = get_countries_names(df=accommodation_meal_df)
+            countries_of_interest = [c for c in countries_fr if any_in(countries_names[c], query)]
+            for c in countries_of_interest:
+                rate = accommodation_meal_df[accommodation_meal_df['pays'] == c][['meal', 'accommodation']].values
+                block.content += accommodation_meal_known + c + " is " + rate[0][0] + ' for meals and ' \
+                                 + rate[0][1] + ' for accommodation\n'
+            if len(countries_of_interest) == 0:
+                block.content += accommodation_meal_unknown
+            return block
+        def expand_block(special: str, ctrl: Controller, block: Block, query: str) -> Block:
+            routing_table = {'RemotenessRateTable': remote_rate_fn,
+                             'AccommodationMealTable': accommodation_meal_fn, }
+            if special in routing_table.keys():
+                fn = routing_table[special]
+                block = fn(ctrl, block, query)
+            return block
+        for special in block.specials:
+            block = expand_block(special, self, block, query)
+        return block

src/domain/__pycache__/container.cpython-310.pyc DELETED Viewed

Binary file (3.88 kB)

src/domain/__pycache__/doc.cpython-310.pyc DELETED Viewed

Binary file (2.61 kB)

src/domain/__pycache__/paragraph.cpython-310.pyc DELETED Viewed

Binary file (998 Bytes)

src/domain/__pycache__/style.cpython-310.pyc DELETED Viewed

Binary file (1.57 kB)

src/domain/project.py DELETED Viewed

@@ -1,9 +0,0 @@
-from src.domain.doc import Doc
-class Project:
-    def __init__(self, name: str, docs: [Doc]):
-        self.docs = docs
-        self.name = name

src/domain/style.py DELETED Viewed

@@ -1,121 +0,0 @@
-from docx.enum.style import WD_STYLE_TYPE
-class Style:
-    def __init__(self, xstyle, doc_id, id_):
-        self.id_ = int(str(doc_id)+str(id_))
-        self.xstyle = xstyle
-        #self.new_style = self.copy_from
-    def copy_from(self, xref):  # need to be further developed
-        if xref.type == WD_STYLE_TYPE.PARAGRAPH:
-            self.xstyle.font.size = xref.font.size
-            self.xstyle.font.color.rgb = xref.font.color.rgb
-            self.xstyle.font.name = xref.font.name
-            self.xstyle.font.all_caps = xref.font.all_caps
-            # Read/write. Causes text in this font to appear in capital letters.
-            self.xstyle.font.bold = xref.font.bold
-            # Read/write. Causes text in this font to appear in bold.
-            self.xstyle.font.complex_script= xref.font.complex_script
-            # Read/write tri-state value. When True, causes the characters in
-            # the run to be treated as complex script regardless of their Unicode values.
-            # "complex script" refers to text written using a complex writing system such as Arabic, Hebrew, Tamil,
-            # Persian, and others.These scripts require special typesetting and handling because they have different
-            # writing directions, glyph connections, and letter shape variations. Word provides features that support
-            # these complex scripts, allowing users to easily create, edit, and format this type of text.
-            self.xstyle.font.cs_bold = xref.font.cs_bold
-            # Read/write tri-state value. When True, causes the complex script characters
-            # in the run to be displayed in bold typeface.
-            self.xstyle.font.cs_italic = xref.font.cs_italic
-            # Read/write tri-state value. When True, causes the complex script characters
-            # in the run to be displayed in italic typeface
-            self.xstyle.font.double_strike = xref.font.double_strike
-            # Read/write tri-state value. When True, causes the text in the run to appear with double strikethrough.
-            self.xstyle.font.emboss = xref.font.emboss
-            # Read/write tri-state value. When True, causes the text in the run to appear
-            # as if raised off the page in relief.
-            self.xstyle.font.hidden = xref.font.hidden
-            # Read/write tri-state value. When True, causes the text in the run to be hidden from display,
-            # unless applications settings force hidden text to be shown.
-            self.xstyle.font.highlight_color = xref.font.highlight_color
-            # A member of WD_COLOR_INDEX indicating the color of highlighting applied,
-            # or None if no highlighting is applied.
-            self.xstyle.font.imprint = xref.font.imprint
-            # Read/write tri-state value. When True,
-            # causes the text in the run to appear as if pressed into the page.
-            self.xstyle.font.italic = xref.font.italic
-            self.xstyle.font.math = xref.font.math
-            self.xstyle.font.no_proof = xref.font.no_proof
-            # Read/write tri-state value. When True, specifies that the contents of this run
-            # should not report any errors when the document is scanned for spelling and grammar.
-            self.xstyle.font.outline = xref.font.outline
-            # Read/write tri-state value. When True causes the characters in the run to appear as if they
-            # have an outline, by drawing a one pixel wide border around the inside and
-            # outside borders of each character glyph.
-            self.xstyle.font.rtl = xref.font.rtl
-            # Read/write tri-state value. When True causes the text in the
-            # run to have right-to-left characteristics.
-            self.xstyle.font.shadow = xref.font.shadow
-            self.xstyle.font.small_caps = xref.font.small_caps
-            self.xstyle.font.snap_to_grid = xref.font.snap_to_grid
-            # Read/write tri-state value. When True causes the run to use the document grid characters per line
-            # settings defined in the docGrid element when laying out the characters in this run.
-            # Snap to grid" is a layout feature that helps users align text boxes, images, or other objects precisely
-            # to a virtual gridline, ensuring consistent spacing and alignment of objects in a document. It improves the
-            # visual appearance of a document and makes it easier to read and understand. This feature is particularly
-            # useful for creating large documents such as reports, posters, and flyers, making them look more
-            # professional, organized, and readable."""
-            self.xstyle.font.spec_vanish = xref.font.spec_vanish
-            # Read/write tri-state value. When True, specifies that the given run shall always behave as if it is
-            # hidden, even when hidden text is being displayed in the current document. The property has a very narrow,
-            # specialized use related to the table of contents.
-            self.xstyle.font.strike = xref.font.strike
-            # Read/write tri-state value. When True causes the text in the run to appear with a single horizontal line
-            # through the center of the line.
-            self.xstyle.font.subscript = xref.font.subscript
-            # Boolean indicating whether the characters in this Font appear as subscript. None indicates the
-            # subscript/subscript value is inherited from the style hierarchy.
-            self.xstyle.font.superscript = xref.font.superscript
-            self.xstyle.font.underline = xref.font.underline
-            self.xstyle.font.web_hidden = xref.font.web_hidden
-            # Using the "Web hidden" property allows us to create multiple versions of a document where some content
-            # can be hidden, while other content can be displayed publicly. For example, in a resume, you can use the
-            # "Web hidden" property to hide private information such as phone numbers and addresses. This information
-            # will only be displayed when an employer chooses to view it.
-            self.xstyle.base_style = xref.base_style
-            # Style object this style inherits from or None if this style is not based on another style.
-            # self.xstyle.builtin = xref.builtin
-            self.xstyle.hidden = xref.hidden
-            # True if display of this style in the style gallery and list of recommended styles is suppressed.
-            # False otherwise. In order to be shown in the style gallery, this value must be False and quick_style
-            # must be True.
-            self.xstyle.locked = xref.locked
-            # True if this style is locked. not appear in the styles panel or the style gallery and cannot be applied
-            # to document content
-            self.xstyle.name = xref.name
-            self.xstyle.priority = xref.priority
-            # The integer sort key governing display sequence of this style in the Word UI. None indicates no setting
-            # is defined, causing Word to use the default value of 0. Style name is used as a secondary sort key to
-            # resolve ordering of styles having the same priority value.
-            # In Microsoft Word, "priority" is typically used to describe the importance of markers and comments to
-            # help authors and editors determine the urgency and priority of the feedback and changes being provided.
-            # For example, a document may use priority markers such as "high," "medium," "low," etc.
-            # to indicate issues that need to be addressed with a higher priority.
-            self.xstyle.quick_style = xref.quick_style
-            # True if this style should be displayed in the style gallery when hidden is False. Read/write Boolean.
-            # for example, Quick Styles can be found in the "Styles" group on the "Home" tab.
-            # self.xstyle.type = xref.type
-            self.xstyle.unhide_when_used = xref.unhide_when_used
-            # True if an application should make this style visible the next time it is applied to content.
-            # False otherwise. Note that python-docx does not automatically unhide a style having True for this
-            # attribute when it is applied to content.
-            # "unhide_when_used" can refer to a feature in Microsoft Excel. It is a cell format option that allows the
-            # cell to automatically show when it is being used and hide when it is not being used. This is useful when
-            # dealing with complex worksheets as it helps users manage and organize data better. When the user needs to
-            # edit or input data, the cell will automatically show, and once the user has completed the operation, the
-            # cell will automatically hide to better present the data.

src/domain/user.py DELETED Viewed

@@ -1,4 +0,0 @@
-class User:
-    def __init__(self, username, ):
-        self.name = username

src/model/__pycache__/block.cpython-310.pyc ADDED Viewed

Binary file (1.73 kB). View file