YvesP commited on
Commit
7fea1f4
·
1 Parent(s): 030bd2d

added file management

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. __pycache__/config.cpython-310.pyc +0 -0
  2. app.py +25 -83
  3. config.py +34 -0
  4. data/.DS_Store +0 -0
  5. data/AccomodationAndMealsForfaits_en.csv +0 -0
  6. data/AccomodationAndMealsForfaits_en.numbers +0 -0
  7. data/AccomodationAndMealsForfaits_fr.csv +0 -31
  8. data/AccomodationAndMealsForfaits_fr.numbers +0 -0
  9. data/BaremeTauxEloignement.csv +0 -84
  10. data/ForfaitsRemboursements.csv +0 -31
  11. data/NonPrisEnCharge.csv +0 -14
  12. data/accommodation_meal_rates.csv +31 -0
  13. data/business_trips_content_en.docx +0 -0
  14. data/business_trips_content_fr.docx +0 -0
  15. data/business_trips_content_until_3_en.docx +0 -0
  16. data/business_trips_content_until_3_enfr.docx +0 -0
  17. data/business_trips_content_until_3_fr.docx +0 -0
  18. data/{business_trips_content_until_9_en.docx → business_trips_content_until_end_en.docx} +0 -0
  19. data/business_trips_content_until_end_fr.docx +0 -0
  20. data/business_trips_plan_en.docx +0 -0
  21. data/business_trips_plan_until_3_en.docx +0 -0
  22. data/business_trips_plan_until_3_fr.docx +0 -0
  23. data/{business_trips_plan_until_9_en.docx → business_trips_plan_until_end_en.docx} +0 -0
  24. data/remote_rates.csv +83 -0
  25. data/transports.docx +0 -0
  26. data/transports_content_en.docx +0 -0
  27. data/transports_content_fr.docx +0 -0
  28. data/transports_plan.docx +0 -0
  29. data/transports_plan_en.docx +0 -0
  30. data/transports_plan_short_en.docx +0 -0
  31. data/transports_plan_short_fr.docx +0 -0
  32. data/~$ansports_contenu.txt +0 -0
  33. data/~$placementsEtVoyages.docx +0 -0
  34. data/~$siness_trip_plan_until_3_fr.docx +0 -0
  35. data/~$siness_trips_content_until_3_fr.docx +0 -0
  36. data/~$siness_trips_content_until_9_en.docx +0 -0
  37. data/{~$ansports.docx → ~$siness_trips_content_until_end_en.docx} +0 -0
  38. data/~$siness_trips_plan_until_9_en.docx +0 -0
  39. requirements.txt +0 -98
  40. src/.DS_Store +0 -0
  41. src/control/__pycache__/control.cpython-310.pyc +0 -0
  42. src/control/control.py +105 -33
  43. src/domain/__pycache__/container.cpython-310.pyc +0 -0
  44. src/domain/__pycache__/doc.cpython-310.pyc +0 -0
  45. src/domain/__pycache__/paragraph.cpython-310.pyc +0 -0
  46. src/domain/__pycache__/style.cpython-310.pyc +0 -0
  47. src/domain/project.py +0 -9
  48. src/domain/style.py +0 -121
  49. src/domain/user.py +0 -4
  50. src/model/__pycache__/block.cpython-310.pyc +0 -0
__pycache__/config.cpython-310.pyc ADDED
Binary file (1.91 kB). View file
 
app.py CHANGED
@@ -1,91 +1,33 @@
1
- import gradio as gr
 
 
 
2
 
 
 
 
 
 
 
3
 
4
- import src.control.control as ctrl
 
5
 
 
 
 
6
 
7
- """
8
- ==================================
9
- A. Component part
10
- ==================================
11
- """
12
 
13
- with gr.Blocks() as hrqa:
 
14
 
15
- with gr.Row():
 
 
 
16
 
17
- with gr.Column():
18
- pass
19
 
20
- with gr.Column(scale=10):
21
- """
22
- 1. input docs components
23
- """
24
-
25
- gr.Markdown("# Questions sur le vivre ensemble en entreprise")
26
-
27
- input_text_comp = gr.Textbox(
28
- label="",
29
- lines=1,
30
- max_lines=3,
31
- interactive=True,
32
- placeholder="Posez votre question ici",
33
- )
34
- input_example_comp = gr.Radio(
35
- label="Examples de questions",
36
- choices=["Remboursement de frais de voiture", "Recommandations de transport"],
37
- )
38
- output_text_comp = gr.Textbox(
39
- label="La réponse automatique",
40
- lines=2,
41
- max_lines=10,
42
- interactive=False,
43
- visible=False,
44
- )
45
- sources_comp = gr.CheckboxGroup(
46
- label="Documents sources",
47
- visible=False,
48
- interactive=False,
49
- )
50
-
51
- with gr.Column():
52
- pass
53
-
54
-
55
- def input_text_fn1():
56
- update_ = {
57
- output_text_comp: gr.update(visible=True),
58
- }
59
- return update_
60
-
61
- def input_text_fn2(input_text_):
62
- answer, sources = ctrl.get_response(query=input_text_)
63
- source_labels = [s['distance']+' '+s['paragraph']+' '+s['title']+' from '+s['doc'] for s in sources]
64
- update_ = {
65
- output_text_comp: gr.update(value=answer),
66
- sources_comp: gr.update(visible=True, choices=source_labels, value=source_labels)
67
- }
68
- return update_
69
-
70
-
71
- def input_example_fn(input_example_):
72
- examples = {
73
- "Remboursement de frais de voiture": "Comment sont remboursés mes frais kilométriques sur mes trajets "
74
- "professionnels?",
75
- "Recommandations de transport": "Quelles sont les recommandations de l'entreprise? Vaut-il mieux voyager en "
76
- "train ou en avion?"
77
- }
78
- update_ = {
79
- input_text_comp: gr.update(value=examples[input_example_]),
80
- output_text_comp: gr.update(visible=True),
81
- }
82
- return update_
83
-
84
- input_text_comp\
85
- .submit(input_text_fn1, inputs=[], outputs=[output_text_comp])\
86
- .then(input_text_fn2, inputs=[input_text_comp], outputs=[output_text_comp, sources_comp])
87
- input_example_comp\
88
- .change(input_example_fn, inputs=[input_example_comp], outputs=[input_text_comp, output_text_comp])\
89
- .then(input_text_fn2, inputs=[input_text_comp], outputs=[output_text_comp, sources_comp])
90
-
91
- hrqa.queue().launch()
 
1
+ import pandas as pd
2
+ import os
3
+ from langchain.llms import OpenAI
4
+ import chromadb
5
 
6
+ from config import *
7
+ from src.control.control import Controller
8
+ from src.tools.retriever import Retriever
9
+ from src.tools.llm import LlmAgent
10
+ from src.model.doc import Doc
11
+ import src.view.view as view
12
 
13
+ os.environ["OPENAI_API_KEY"] = OpenAI_KEY
14
+ os.environ["TOKENIZERS_PARALLELISM"] = "true"
15
 
16
+ doc_content = Doc(content_en_path)
17
+ doc_plan = Doc(plan_path)
18
+ doc_content_fr = Doc(content_fr_path)
19
 
20
+ client_db = chromadb.Client()
21
+ retriever = Retriever(client_db, doc_plan, doc_content, doc_content_fr, collection_name)
 
 
 
22
 
23
+ llm_model = OpenAI(temperature=0)
24
+ llm = LlmAgent(llm_model)
25
 
26
+ specials['remote_rate_df'] = pd.read_csv(specials['remote_rate_path'])
27
+ specials['accommodation_meal_df'] = pd.read_csv(specials['accommodation_meal_path'])
28
+ controller = Controller(retriever=retriever, llm=llm, content_language=content_language, plan_language=plan_language,
29
+ specials=specials)
30
 
31
+ qna = view.run(ctrl=controller, examples=examples)
 
32
 
33
+ qna.queue().launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ plan_language = 'en'
2
+ content_language = 'en'
3
+ plan_path = 'data/business_trips_plan_until_end_en.docx'
4
+ content_en_path = 'data/business_trips_content_until_end_en.docx'
5
+ content_fr_path = 'data/business_trips_content_until_end_fr.docx'
6
+
7
+ collection_name = "until_end"
8
+ OpenAI_KEY = "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"
9
+ examples = {
10
+ "Remboursement de frais de voiture": "Comment sont remboursés mes frais kilométriques sur mes trajets "
11
+ "professionnels?",
12
+ "Recommandations de transport": "Quelles sont les recommandations de l'entreprise? "
13
+ "Vaut-il mieux voyager en train ou en avion?",
14
+ "Indemnités pour des séjours longs à l'étranger": "Y a-t-il des indemnités pour des séjours longs à l'étranger?",
15
+ "Indemnités pour des séjours longs en Bolivie": "Y a-t-il des indemnités pour des séjours longs en Bolivie?",
16
+ "Indemnités pour les repas aux Pays-Bas": "Quelles sont les indemnités pour les repas au Pays-Bas?"
17
+ }
18
+ countries_extensions = {
19
+ 'Royaume-Uni': ['UK', 'U.K.','RU', 'R.U.', 'Angleterre'],
20
+ 'Etats-Unis': ['Etats-unis', 'Etats Unis', 'Etats unis', 'ETATS-UNIS', 'USA'],
21
+ 'E.A.U': ["EAU", "Emirats", "Emirats Arabes Unis", "Emirates", "UAE", "United Arab Emirates"],
22
+ 'Pays-Bas': ['Les Pays-Bas']
23
+ }
24
+ specials = {'remote_rate_path': 'data/remote_rates.csv',
25
+ 'remote_rate_known': "the scale rate of remoteness for the ",
26
+
27
+ 'remote_rate_unknown': "the scale rate of remoteness for the country mentionned is unknown. Allowances "
28
+ "apply though",
29
+ 'accommodation_meal_path': 'data/accommodation_meal_rates.csv',
30
+ 'accommodation_meal_known': 'the rates for accommodation and meals are the following: ',
31
+ 'accommodation_meal_unknown': 'the rates for accommodation and meals are not defined for the country '
32
+ 'mentionned ',
33
+ 'countries_extensions': countries_extensions,
34
+ }
data/.DS_Store CHANGED
Binary files a/data/.DS_Store and b/data/.DS_Store differ
 
data/AccomodationAndMealsForfaits_en.csv DELETED
Binary file (1.91 kB)
 
data/AccomodationAndMealsForfaits_en.numbers DELETED
Binary file (160 kB)
 
data/AccomodationAndMealsForfaits_fr.csv DELETED
@@ -1,31 +0,0 @@
1
- Destination;Hebergement;Repas
2
- France;125;27
3
- Allemagne;150;35
4
- Arabie Saoudite;200;40
5
- Autriche;110;40
6
- Belgique;150;35
7
- Canada;150;30
8
- Chine;113;37
9
- Egypte;150;25
10
- Emirats Arabes Unis;160;46
11
- Espagne;130;30
12
- Etats-Unis;140;47
13
- Gr�ce;140;25
14
- Inde;160;47
15
- Irlande;180;30
16
- Italie;120;37
17
- Japon;150;25
18
- Maroc;110;25
19
- Mexique;130;27
20
- Norv�ge;160;40
21
- Pays-Bas;150;32
22
- Pologne;110;23
23
- Portugal;108;25
24
- Qatar;210;35
25
- Royaume-Uni;130;28
26
- Russie;180;50
27
- Singapour;170;42
28
- Su�de;90;30
29
- Suisse;192;35
30
- Taiwan;123;37
31
- Turquie;150;28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/AccomodationAndMealsForfaits_fr.numbers DELETED
Binary file (163 kB)
 
data/BaremeTauxEloignement.csv DELETED
@@ -1,84 +0,0 @@
1
- Tableau 1
2
- Barème Taux d’Éloignement;
3
- Afrique du sud;10 %
4
- Algérie;15 %
5
- Allemagne;0 %
6
- Arabie saoudite;12 %
7
- Argentine;11 %
8
- Australie;3 %
9
- Autriche;0 %
10
- Belgique;0 %
11
- Bolivie;11 %
12
- Brésil;11 %
13
- Bulgarie;10 %
14
- Cameroun;13 %
15
- Canada;3 %
16
- Chili;9 %
17
- Chine;13 %
18
- Chypre;4 %
19
- Colombie;13 %
20
- Corée;11 %
21
- Croatie;7 %
22
- Danemark;0 %
23
- Djibouti;13 %
24
- E.A.U;9 %
25
- Egypte;16 %
26
- Equateur;12 %
27
- Espagne;0 %
28
- Estonie;7 %
29
- Etats unis;3 %
30
- Ethiopie;12 %
31
- Finlande;0 %
32
- Grande Bretagne;0 %
33
- Grèce;0 %
34
- Guadeloupe;3 %
35
- Guyane;7 %
36
- Hong Kong;8 %
37
- Hongrie;6 %
38
- Ile Maurice;8 %
39
- Inde;15 %
40
- Indonésie;17 %
41
- Irlande;0 %
42
- Israël;9 %
43
- Italie;0 %
44
- Japon;8 %
45
- Jordanie;10 %
46
- Kenya;13 %
47
- Koweït;11 %
48
- Laos;13 %
49
- Luxembourg;0 %
50
- Madagascar;13 %
51
- Malaisie;14 %
52
- Maroc;8 %
53
- Martinique;3 %
54
- Mauritanie;10 %
55
- Mexique;12 %
56
- Mozambique;14 %
57
- Nigeria;17 %
58
- Norvège;0 %
59
- Nouvelle Calédonie;4 %
60
- Pakistan;17 %
61
- Pérou;13 %
62
- Philippines;16 %
63
- Pologne;8 %
64
- Polynésie;5 %
65
- Portugal;0 %
66
- Qatar;9 %
67
- République Congo;14 %
68
- République tchèque;6 %
69
- Roumanie;11 %
70
- Russie;13 %
71
- Sénégal;10 %
72
- Serbie;11 %
73
- Singapour;6 %
74
- Slovaquie;6 %
75
- Sri Lanka;15 %
76
- Suède;0 %
77
- Suisse;0 %
78
- Taiwan;11 %
79
- Thaïlande;12 %
80
- Tunisie;7 %
81
- Turquie;10 %
82
- Ukraine;12 %
83
- Venezuela;13 %
84
- Vietnam;13 %
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/ForfaitsRemboursements.csv DELETED
@@ -1,31 +0,0 @@
1
- Destination;;Hébergement;;Repas
2
- France;;IDF 125€ / Province 100€;;27 €
3
- Allemagne;;150 € ;;35 €
4
- Arabie Saoudite;;200 € ;;40 €
5
- Autriche;;110 € ;;40 €
6
- Belgique;;150 € ;;35 €
7
- Canada;;150 € ;;30 €
8
- Chine;;113 € ;;37 €
9
- Egypte;;150 € ;;25 €
10
- Emirats Arabes Unis;;160 € ;;46 €
11
- Espagne;;130 € ;;30 €
12
- Etats-Unis;;140 € ;;47 €
13
- Grèce;;140 € ;;25 €
14
- Inde;;160 € ;;47 €
15
- Irlande;;180 € ;;30 €
16
- Italie;;120 € ;;37 €
17
- Japon;;150 € ;;25 €
18
- Maroc;;110 € ;;25 €
19
- Mexique;;130 € ;;27 €
20
- Norvège;;160 € ;;40 €
21
- Pays-Bas;;150 € ;;32 €
22
- Pologne;;110 € ;;23 €
23
- Portugal;;108 € ;;25 €
24
- Qatar;;210 € ;;35 €
25
- Royaume-Uni;;130 € ;;28 €
26
- Russie;;180 € ;;50 €
27
- Singapour;;170 € ;;42 €
28
- Suède;;90 € ;;30 €
29
- Suisse;;192 € ;;35 €
30
- Taiwan;;123 € ;;37 €
31
- Turquie;;150 € ;;28 €
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/NonPrisEnCharge.csv DELETED
@@ -1,14 +0,0 @@
1
- Non pris en charge via Note de Frais
2
- "Matériel informatique : téléphone, chargeur, tablette, adaptateur prise, etc."
3
- "Outillage : balai, tournevis, disque de disqueuse, etc."
4
- "Mobilier/Aménagement de bureau : plantes, dalles, poufs, etc."
5
- "Fournitures de bureau : café, piles, etc."
6
- Conférence/Cotisation
7
- Séminaire/Réunion Team Building
8
- Doublon de clés
9
- Equipement de Protection Individuelle (EPI)
10
- Achat de bagagerie : neuf/perdu/endommagé
11
- Lavage et recharge Carte de Lavage : tous types de véhicule
12
- Consommation alcoolisée
13
- "Collation : confiserie, gâteau, boisson, etc."
14
- "Prestation de loisirs : Spa, piscine, massage, remontée mécanique, escape game, etc."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/accommodation_meal_rates.csv ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ country,pays,accommodation,meal
2
+ France,France,Ile de France 125€ / Province 100€,27€
3
+ Germany,Allemagne,150€,35€
4
+ Saudi Arabia,Arabie Saoudite,200€,40€
5
+ Austria,Autriche,110€,40€
6
+ Belgium,Belgique,150€,35€
7
+ Canada,Canada,150€,30€
8
+ China,Chine,113€,37€
9
+ Egypt,Egypte,150€,25€
10
+ United Arab Emirates,Emirats Arabes Unis,160€,46€
11
+ Spain,Espagne,130€,30€
12
+ United States,Etats-Unis,140€,47€
13
+ Greece,Grèce,140€,25€
14
+ India,Inde,160€,47€
15
+ Ireland,Irlande,180€,30€
16
+ Italy,Italie,120€,37€
17
+ Japan,Japon,150€,25€
18
+ Morocco,Maroc,110€,25€
19
+ Mexico,Mexique,130€,27€
20
+ Norway,Norvège,160€,40€
21
+ The Netherlands,Pays-Bas,150€,32€
22
+ Poland,Pologne,110€,23€
23
+ Portugal,Portugal,108€,25€
24
+ Qatar,Qatar,210€,35€
25
+ United Kingdom,Royaume-Uni,130€,28€
26
+ Russia,Russie,180€,50€
27
+ Singapore,Singapour,170€,42€
28
+ Sweden,Suède,90€,30€
29
+ Swiss,Suisse,192€,35€
30
+ taiwan,Taïwan,123€,37€
31
+ Türkiye,Turquie,150€,28€
data/business_trips_content_en.docx DELETED
Binary file (42.7 kB)
 
data/business_trips_content_fr.docx DELETED
Binary file (70 kB)
 
data/business_trips_content_until_3_en.docx DELETED
Binary file (42.7 kB)
 
data/business_trips_content_until_3_enfr.docx DELETED
Binary file (65.7 kB)
 
data/business_trips_content_until_3_fr.docx DELETED
Binary file (70.1 kB)
 
data/{business_trips_content_until_9_en.docx → business_trips_content_until_end_en.docx} RENAMED
Binary files a/data/business_trips_content_until_9_en.docx and b/data/business_trips_content_until_end_en.docx differ
 
data/business_trips_content_until_end_fr.docx ADDED
Binary file (74.6 kB). View file
 
data/business_trips_plan_en.docx DELETED
Binary file (50.5 kB)
 
data/business_trips_plan_until_3_en.docx DELETED
Binary file (36.1 kB)
 
data/business_trips_plan_until_3_fr.docx DELETED
Binary file (35.6 kB)
 
data/{business_trips_plan_until_9_en.docx → business_trips_plan_until_end_en.docx} RENAMED
Binary files a/data/business_trips_plan_until_9_en.docx and b/data/business_trips_plan_until_end_en.docx differ
 
data/remote_rates.csv ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pays,country,rate
2
+ Afrique du sud,South Africa,10%
3
+ Algérie,Algeria,15%
4
+ Allemagne,Germany,0%
5
+ Arabie saoudite,Saudi Arabia,12%
6
+ Argentine,Argentina,11%
7
+ Australie,Australia,3%
8
+ Autriche,Austria,0%
9
+ Belgique,Belgium,0%
10
+ Bolivie,Bolivia,11%
11
+ Brésil,Brazil,11%
12
+ Bulgarie,Bulgaria,10%
13
+ Cameroun,Cameroon,13%
14
+ Canada,Canada,3%
15
+ Chili,Chile,9%
16
+ Chine,China,13%
17
+ Chypre,Cyprus,4%
18
+ Colombie,Colombia,13%
19
+ Corée,Korea,11%
20
+ Croatie,Croatia,7%
21
+ Danemark,Denmark,0%
22
+ Djibouti,Djibouti,13%
23
+ E.A.U,united arab emirates,9%
24
+ Egypte,Egypt,16%
25
+ Equateur,Ecuador,12%
26
+ Espagne,Spain,0%
27
+ Estonie,Estonia,7%
28
+ Etats-Unis,United States,3%
29
+ Ethiopie,Ethiopia,12%
30
+ Finlande,Finland,0%
31
+ Grande Bretagne,Britain,0%
32
+ Grèce,Greece,0%
33
+ Guadeloupe,Guadeloupe,3%
34
+ Guyane,Guyana,7%
35
+ Hong Kong,hong kong,8%
36
+ Hongrie,Hungary,6%
37
+ Ile Maurice,Mauritius Islands,8%
38
+ Inde,India,15%
39
+ Indonésie,Indonesia,17%
40
+ Irlande,Ireland,0%
41
+ Israël,Israel,9%
42
+ Italie,Italy,0%
43
+ Japon,Japan,8%
44
+ Jordanie,Jordan,10%
45
+ Kenya,Kenya,13%
46
+ Koweït,Kuwait,11%
47
+ Laos,Laos,13%
48
+ Luxembourg,Luxemburg,0%
49
+ Madagascar,Madagascar,13%
50
+ Malaisie,Malaysia,14%
51
+ Maroc,Morocco,8%
52
+ Martinique,Martinique,3%
53
+ Mauritanie,Mauritania,10%
54
+ Mexique,Mexico,12%
55
+ Mozambique,mozambique,14%
56
+ Nigeria,Nigeria,17%
57
+ Norvège,Norway,0%
58
+ Nouvelle Calédonie,New Caledonia,4%
59
+ Pakistan,Pakistan,17%
60
+ Pérou,Peru,13%
61
+ Philippines,Philippines,16%
62
+ Pologne,Poland,8%
63
+ Polynésie,Polynesia,5%
64
+ Portugal,Portugal,0%
65
+ Qatar,Qatar,9%
66
+ République Congo,Republic of the Congo,14%
67
+ République tchèque,Czech Republic,6%
68
+ Roumanie,Romania,11%
69
+ Russie,Russia,13%
70
+ Sénégal,Senegal,10%
71
+ Serbie,Serbia,11%
72
+ Singapour,Singapore,6%
73
+ Slovaquie,Slovakia,6%
74
+ Sri Lanka,Sri Lanka,15%
75
+ Suède,Sweden,0%
76
+ Suisse,Swiss,0%
77
+ Taiwan,taiwan,11%
78
+ Thaïlande,Thailand,12%
79
+ Tunisie,Tunisia,7%
80
+ Turquie,Türkiye,10%
81
+ Ukraine,Ukraine,12%
82
+ Venezuela,Venezuela,13%
83
+ Vietnam,Vietnam,13%
data/transports.docx DELETED
Binary file (41.2 kB)
 
data/transports_content_en.docx DELETED
Binary file (40.4 kB)
 
data/transports_content_fr.docx DELETED
Binary file (41.2 kB)
 
data/transports_plan.docx DELETED
Binary file (35.5 kB)
 
data/transports_plan_en.docx DELETED
Binary file (35.5 kB)
 
data/transports_plan_short_en.docx DELETED
Binary file (35.1 kB)
 
data/transports_plan_short_fr.docx DELETED
Binary file (35 kB)
 
data/~$ansports_contenu.txt DELETED
Binary file (162 Bytes)
 
data/~$placementsEtVoyages.docx DELETED
Binary file (162 Bytes)
 
data/~$siness_trip_plan_until_3_fr.docx DELETED
Binary file (162 Bytes)
 
data/~$siness_trips_content_until_3_fr.docx DELETED
Binary file (162 Bytes)
 
data/~$siness_trips_content_until_9_en.docx DELETED
Binary file (162 Bytes)
 
data/{~$ansports.docx → ~$siness_trips_content_until_end_en.docx} RENAMED
File without changes
data/~$siness_trips_plan_until_9_en.docx DELETED
Binary file (162 Bytes)
 
requirements.txt CHANGED
@@ -1,102 +1,4 @@
1
- aiofiles==23.1.0
2
- aiohttp==3.8.4
3
- aiosignal==1.3.1
4
- altair==5.0.1
5
- anyio==3.7.0
6
- async-timeout==4.0.2
7
- attrs==23.1.0
8
- backoff==2.2.1
9
- certifi==2023.5.7
10
- charset-normalizer==3.1.0
11
  chromadb==0.3.25
12
- click==8.1.3
13
- clickhouse-connect==0.5.25
14
- coloredlogs==15.0.1
15
- contourpy==1.0.7
16
- cycler==0.11.0
17
- dataclasses-json==0.5.7
18
- duckdb==0.8.0
19
- exceptiongroup==1.1.1
20
- fastapi==0.96.0
21
- ffmpy==0.3.0
22
- filelock==3.12.0
23
- flatbuffers==23.5.26
24
- fonttools==4.39.4
25
- frozenlist==1.3.3
26
- fsspec==2023.5.0
27
  gradio==3.33.1
28
- gradio_client==0.2.5
29
- h11==0.14.0
30
- hnswlib==0.7.0
31
- httpcore==0.17.2
32
- httptools==0.5.0
33
- httpx==0.24.1
34
- huggingface-hub==0.15.1
35
- humanfriendly==10.0
36
- idna==3.4
37
- Jinja2==3.1.2
38
- jsonschema==4.17.3
39
- kiwisolver==1.4.4
40
  langchain==0.0.190
41
- linkify-it-py==2.0.2
42
- lxml==4.9.2
43
- lz4==4.3.2
44
- markdown-it-py==2.2.0
45
- MarkupSafe==2.1.3
46
- marshmallow==3.19.0
47
- marshmallow-enum==1.5.1
48
- matplotlib==3.7.1
49
- mdit-py-plugins==0.3.3
50
- mdurl==0.1.2
51
- monotonic==1.6
52
- mpmath==1.3.0
53
- multidict==6.0.4
54
- mypy-extensions==1.0.0
55
- numexpr==2.8.4
56
- numpy==1.24.3
57
- onnxruntime==1.15.0
58
- openai==0.27.7
59
- openapi-schema-pydantic==1.2.4
60
- orjson==3.9.0
61
- overrides==7.3.1
62
- packaging==23.1
63
  pandas==2.0.2
64
- Pillow==9.5.0
65
- posthog==3.0.1
66
- protobuf==4.23.2
67
- pydantic==1.10.8
68
- pydub==0.25.1
69
- Pygments==2.15.1
70
- pyparsing==3.0.9
71
- pyrsistent==0.19.3
72
- python-dateutil==2.8.2
73
- python-docx==0.8.11
74
- python-dotenv==1.0.0
75
- python-multipart==0.0.6
76
- pytz==2023.3
77
- PyYAML==6.0
78
- regex==2023.6.3
79
- requests==2.31.0
80
- semantic-version==2.10.0
81
- six==1.16.0
82
- sniffio==1.3.0
83
- SQLAlchemy==2.0.15
84
- starlette==0.27.0
85
- sympy==1.12
86
- tabulate==0.9.0
87
- tenacity==8.2.2
88
- tiktoken==0.4.0
89
- tokenizers==0.13.3
90
- toolz==0.12.0
91
- tqdm==4.65.0
92
- typing-inspect==0.9.0
93
- typing_extensions==4.6.3
94
- tzdata==2023.3
95
- uc-micro-py==1.0.2
96
- urllib3==2.0.2
97
- uvicorn==0.22.0
98
- uvloop==0.17.0
99
- watchfiles==0.19.0
100
- websockets==11.0.3
101
- yarl==1.9.2
102
- zstandard==0.21.0
 
 
 
 
 
 
 
 
 
 
 
1
  chromadb==0.3.25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  gradio==3.33.1
 
 
 
 
 
 
 
 
 
 
 
 
3
  langchain==0.0.190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  pandas==2.0.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/.DS_Store ADDED
Binary file (6.15 kB). View file
 
src/control/__pycache__/control.cpython-310.pyc ADDED
Binary file (5.95 kB). View file
 
src/control/control.py CHANGED
@@ -1,43 +1,115 @@
1
- import chromadb
2
 
3
- import src.tools.retriever as rtrvr
4
- import src.tools.llm as llm
5
- from src.domain.doc import Doc
6
 
7
- chroma_client = chromadb.Client()
8
 
9
- plan_language = 'en'
10
- content_language = 'en'
11
- path_plan = 'data/business_trips_plan_until_9_en.docx'
12
- path_content = 'data/business_trips_content_until_9_en.docx'
13
- collection_name = "until_9"
14
 
15
- doc_plan = Doc(path_plan)
16
- doc_content = Doc(path_content)
17
- collection_ = rtrvr.init_collections(chroma_client, doc_plan, doc_content, collection_name)
 
 
 
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- def get_response(query):
21
- if plan_language == 'en':
22
- query = llm.translate(query)
23
- sources = rtrvr.similarity_search(collection=collection_, query=query)
24
- sources = select_best_sources(sources)
25
- sources_contents = [s['content'] for s in sources]
26
- context = '\n'.join(sources_contents)
27
- answer = llm.generate_paragraph(query=query, context=context, language=content_language)
28
- if content_language == 'en':
29
- answer = llm.translate(text=answer, language='fr')
30
- return answer.lstrip(), sources
 
 
 
 
 
 
 
31
 
 
 
 
 
 
32
 
33
- def select_best_sources(sources: [], delta_1_2=0.1, delta_1_n=0.25, absolute=1.1) -> []:
34
- best_sources = []
35
- for idx, s in enumerate(sources):
36
- if idx == 0 \
37
- or (s['distance_f'] - sources[idx - 1]['distance_f'] < delta_1_2
38
- and s['distance_f'] - sources[0]['distance_f'] < delta_1_n) \
39
- or s['distance_f'] < absolute:
40
- best_sources.append(s)
41
- return best_sources
42
 
 
 
 
 
 
 
 
 
 
 
 
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
 
3
+ from src.tools.retriever import Retriever
4
+ from src.tools.llm import LlmAgent
5
+ from src.model.block import Block
6
 
 
7
 
8
+ class Controller:
 
 
 
 
9
 
10
+ def __init__(self, retriever: Retriever, llm: LlmAgent, plan_language: str, content_language: str, specials: {}):
11
+ self.plan_language = plan_language
12
+ self.content_language = content_language
13
+ self.retriever = retriever
14
+ self.specials = specials
15
+ self.llm = llm
16
 
17
+ def get_response(self, query_fr: str) -> (str, [Block]):
18
+ query = self.llm.translate(text=query_fr) if self.plan_language == 'en' else query_fr
19
+ block_sources = self.retriever.similarity_search(query=query)
20
+ block_sources = self._select_best_sources(block_sources)
21
+ for block in block_sources:
22
+ self._expand_block_with_specials(block, query_fr)
23
+ sources_contents = [s.content for s in block_sources]
24
+ context = '\n'.join(sources_contents)
25
+ answer = self.llm.generate_paragraph(query=query, context=context, language=self.content_language)
26
+ sources_contents_fr = [s.content_fr for s in block_sources[:2]]
27
+ context_fr = '\n'.join(sources_contents_fr)
28
+ if self.content_language == 'en':
29
+ answer = self.llm.generate_answer(answer_en=answer, query=query_fr, context_fr=context_fr)
30
+ answer = answer.strip().strip("'''").strip("```")
31
+ return answer, block_sources
32
 
33
+ @staticmethod
34
+ def _select_best_sources(sources: [Block], delta_1_2=0.1, delta_1_n=0.25, absolute=1.1, alpha=0.85) -> [Block]:
35
+ """
36
+ Select the best sources: not far from the very best, not far from the last selected, and not too bad per se
37
+ """
38
+ best_sources = []
39
+ for idx, s in enumerate(sources):
40
+ if idx == 0 \
41
+ or (s.distance - sources[idx - 1].distance < delta_1_2
42
+ and s.distance - sources[0].distance < delta_1_n) \
43
+ or s.distance < absolute:
44
+ best_sources.append(s)
45
+ delta_1_2 *= alpha
46
+ delta_1_n *= alpha
47
+ absolute *= alpha
48
+ else:
49
+ break
50
+ return best_sources
51
 
52
+ def _expand_block_with_specials(self, block: Block, query: str) -> Block:
53
+ """
54
+ Performs special treatments for blocks expanding the text in the block
55
+ For example, it may add specific content extracted from a table based on elements of the query
56
+ """
57
 
58
+ def any_in(l1: [], l2: []) -> bool:
59
+ """
60
+ checks if any of el in l1 belongs to l2
61
+ """
62
+ return 0 < len([el for el in l1 if el in l2])
 
 
 
 
63
 
64
+ def get_countries_names(df: pd.DataFrame) -> [str]:
65
+ """
66
+ extends the ortograph of countries: ex. Etats-Unis = USA = Etats Unis, etc.
67
+ """
68
+ countries_fr = list(df['pays'])
69
+ countries_en = list(df['country'])
70
+ countries_names = {c_fr: [c_fr, c_en] for c_fr, c_en in zip(countries_fr, countries_en)}
71
+ countries_extensions = self.specials['countries_extensions']
72
+ for c in set(countries_extensions.keys()).intersection(set(countries_names.keys())):
73
+ countries_names[c] += countries_extensions[c]
74
+ return countries_names
75
 
76
+ def remote_rate_fn(ctrl: Controller, block: Block, query: str) -> Block:
77
+ remote_rate_df = self.specials['remote_rate_df']
78
+ remote_rate_known = self.specials['remote_rate_known']
79
+ remote_rate_unknown = self.specials['remote_rate_unknown']
80
+ countries_fr = list(remote_rate_df['pays'])
81
+ countries_names = get_countries_names(remote_rate_df)
82
+ countries_of_interest = [c for c in countries_fr if any_in(countries_names[c], query)]
83
+ for c in countries_of_interest:
84
+ rate = remote_rate_df[remote_rate_df['pays'] == c]['rate'].values[0]
85
+ block.content += remote_rate_known + c + " is " + rate + '\n'
86
+ if len(countries_of_interest) == 0:
87
+ block.content += remote_rate_unknown
88
+ return block
89
+
90
+ def accommodation_meal_fn(ctrl: Controller, block: Block, query: str) -> Block:
91
+ accommodation_meal_df = self.specials['accommodation_meal_df']
92
+ accommodation_meal_known = self.specials['accommodation_meal_known']
93
+ accommodation_meal_unknown = self.specials['accommodation_meal_unknown']
94
+ countries_fr = list(accommodation_meal_df['pays'])
95
+ countries_names = get_countries_names(df=accommodation_meal_df)
96
+ countries_of_interest = [c for c in countries_fr if any_in(countries_names[c], query)]
97
+ for c in countries_of_interest:
98
+ rate = accommodation_meal_df[accommodation_meal_df['pays'] == c][['meal', 'accommodation']].values
99
+ block.content += accommodation_meal_known + c + " is " + rate[0][0] + ' for meals and ' \
100
+ + rate[0][1] + ' for accommodation\n'
101
+ if len(countries_of_interest) == 0:
102
+ block.content += accommodation_meal_unknown
103
+ return block
104
+
105
+ def expand_block(special: str, ctrl: Controller, block: Block, query: str) -> Block:
106
+ routing_table = {'RemotenessRateTable': remote_rate_fn,
107
+ 'AccommodationMealTable': accommodation_meal_fn, }
108
+ if special in routing_table.keys():
109
+ fn = routing_table[special]
110
+ block = fn(ctrl, block, query)
111
+ return block
112
+
113
+ for special in block.specials:
114
+ block = expand_block(special, self, block, query)
115
+ return block
src/domain/__pycache__/container.cpython-310.pyc DELETED
Binary file (3.88 kB)
 
src/domain/__pycache__/doc.cpython-310.pyc DELETED
Binary file (2.61 kB)
 
src/domain/__pycache__/paragraph.cpython-310.pyc DELETED
Binary file (998 Bytes)
 
src/domain/__pycache__/style.cpython-310.pyc DELETED
Binary file (1.57 kB)
 
src/domain/project.py DELETED
@@ -1,9 +0,0 @@
1
- from src.domain.doc import Doc
2
-
3
-
4
- class Project:
5
-
6
- def __init__(self, name: str, docs: [Doc]):
7
-
8
- self.docs = docs
9
- self.name = name
 
 
 
 
 
 
 
 
 
 
src/domain/style.py DELETED
@@ -1,121 +0,0 @@
1
- from docx.enum.style import WD_STYLE_TYPE
2
- class Style:
3
-
4
- def __init__(self, xstyle, doc_id, id_):
5
-
6
- self.id_ = int(str(doc_id)+str(id_))
7
- self.xstyle = xstyle
8
- #self.new_style = self.copy_from
9
-
10
- def copy_from(self, xref): # need to be further developed
11
-
12
- if xref.type == WD_STYLE_TYPE.PARAGRAPH:
13
- self.xstyle.font.size = xref.font.size
14
- self.xstyle.font.color.rgb = xref.font.color.rgb
15
- self.xstyle.font.name = xref.font.name
16
- self.xstyle.font.all_caps = xref.font.all_caps
17
- # Read/write. Causes text in this font to appear in capital letters.
18
- self.xstyle.font.bold = xref.font.bold
19
- # Read/write. Causes text in this font to appear in bold.
20
- self.xstyle.font.complex_script= xref.font.complex_script
21
- # Read/write tri-state value. When True, causes the characters in
22
- # the run to be treated as complex script regardless of their Unicode values.
23
- # "complex script" refers to text written using a complex writing system such as Arabic, Hebrew, Tamil,
24
- # Persian, and others.These scripts require special typesetting and handling because they have different
25
- # writing directions, glyph connections, and letter shape variations. Word provides features that support
26
- # these complex scripts, allowing users to easily create, edit, and format this type of text.
27
- self.xstyle.font.cs_bold = xref.font.cs_bold
28
- # Read/write tri-state value. When True, causes the complex script characters
29
- # in the run to be displayed in bold typeface.
30
- self.xstyle.font.cs_italic = xref.font.cs_italic
31
- # Read/write tri-state value. When True, causes the complex script characters
32
- # in the run to be displayed in italic typeface
33
- self.xstyle.font.double_strike = xref.font.double_strike
34
- # Read/write tri-state value. When True, causes the text in the run to appear with double strikethrough.
35
- self.xstyle.font.emboss = xref.font.emboss
36
- # Read/write tri-state value. When True, causes the text in the run to appear
37
- # as if raised off the page in relief.
38
- self.xstyle.font.hidden = xref.font.hidden
39
- # Read/write tri-state value. When True, causes the text in the run to be hidden from display,
40
- # unless applications settings force hidden text to be shown.
41
- self.xstyle.font.highlight_color = xref.font.highlight_color
42
- # A member of WD_COLOR_INDEX indicating the color of highlighting applied,
43
- # or None if no highlighting is applied.
44
- self.xstyle.font.imprint = xref.font.imprint
45
- # Read/write tri-state value. When True,
46
- # causes the text in the run to appear as if pressed into the page.
47
- self.xstyle.font.italic = xref.font.italic
48
- self.xstyle.font.math = xref.font.math
49
- self.xstyle.font.no_proof = xref.font.no_proof
50
- # Read/write tri-state value. When True, specifies that the contents of this run
51
- # should not report any errors when the document is scanned for spelling and grammar.
52
- self.xstyle.font.outline = xref.font.outline
53
- # Read/write tri-state value. When True causes the characters in the run to appear as if they
54
- # have an outline, by drawing a one pixel wide border around the inside and
55
- # outside borders of each character glyph.
56
- self.xstyle.font.rtl = xref.font.rtl
57
- # Read/write tri-state value. When True causes the text in the
58
- # run to have right-to-left characteristics.
59
- self.xstyle.font.shadow = xref.font.shadow
60
- self.xstyle.font.small_caps = xref.font.small_caps
61
- self.xstyle.font.snap_to_grid = xref.font.snap_to_grid
62
- # Read/write tri-state value. When True causes the run to use the document grid characters per line
63
- # settings defined in the docGrid element when laying out the characters in this run.
64
- # Snap to grid" is a layout feature that helps users align text boxes, images, or other objects precisely
65
- # to a virtual gridline, ensuring consistent spacing and alignment of objects in a document. It improves the
66
- # visual appearance of a document and makes it easier to read and understand. This feature is particularly
67
- # useful for creating large documents such as reports, posters, and flyers, making them look more
68
- # professional, organized, and readable."""
69
- self.xstyle.font.spec_vanish = xref.font.spec_vanish
70
- # Read/write tri-state value. When True, specifies that the given run shall always behave as if it is
71
- # hidden, even when hidden text is being displayed in the current document. The property has a very narrow,
72
- # specialized use related to the table of contents.
73
- self.xstyle.font.strike = xref.font.strike
74
- # Read/write tri-state value. When True causes the text in the run to appear with a single horizontal line
75
- # through the center of the line.
76
- self.xstyle.font.subscript = xref.font.subscript
77
- # Boolean indicating whether the characters in this Font appear as subscript. None indicates the
78
- # subscript/subscript value is inherited from the style hierarchy.
79
- self.xstyle.font.superscript = xref.font.superscript
80
- self.xstyle.font.underline = xref.font.underline
81
- self.xstyle.font.web_hidden = xref.font.web_hidden
82
- # Using the "Web hidden" property allows us to create multiple versions of a document where some content
83
- # can be hidden, while other content can be displayed publicly. For example, in a resume, you can use the
84
- # "Web hidden" property to hide private information such as phone numbers and addresses. This information
85
- # will only be displayed when an employer chooses to view it.
86
-
87
- self.xstyle.base_style = xref.base_style
88
- # Style object this style inherits from or None if this style is not based on another style.
89
- # self.xstyle.builtin = xref.builtin
90
- self.xstyle.hidden = xref.hidden
91
- # True if display of this style in the style gallery and list of recommended styles is suppressed.
92
- # False otherwise. In order to be shown in the style gallery, this value must be False and quick_style
93
- # must be True.
94
- self.xstyle.locked = xref.locked
95
- # True if this style is locked. not appear in the styles panel or the style gallery and cannot be applied
96
- # to document content
97
- self.xstyle.name = xref.name
98
- self.xstyle.priority = xref.priority
99
- # The integer sort key governing display sequence of this style in the Word UI. None indicates no setting
100
- # is defined, causing Word to use the default value of 0. Style name is used as a secondary sort key to
101
- # resolve ordering of styles having the same priority value.
102
- # In Microsoft Word, "priority" is typically used to describe the importance of markers and comments to
103
- # help authors and editors determine the urgency and priority of the feedback and changes being provided.
104
- # For example, a document may use priority markers such as "high," "medium," "low," etc.
105
- # to indicate issues that need to be addressed with a higher priority.
106
-
107
- self.xstyle.quick_style = xref.quick_style
108
- # True if this style should be displayed in the style gallery when hidden is False. Read/write Boolean.
109
- # for example, Quick Styles can be found in the "Styles" group on the "Home" tab.
110
- # self.xstyle.type = xref.type
111
- self.xstyle.unhide_when_used = xref.unhide_when_used
112
- # True if an application should make this style visible the next time it is applied to content.
113
- # False otherwise. Note that python-docx does not automatically unhide a style having True for this
114
- # attribute when it is applied to content.
115
-
116
- # "unhide_when_used" can refer to a feature in Microsoft Excel. It is a cell format option that allows the
117
- # cell to automatically show when it is being used and hide when it is not being used. This is useful when
118
- # dealing with complex worksheets as it helps users manage and organize data better. When the user needs to
119
- # edit or input data, the cell will automatically show, and once the user has completed the operation, the
120
- # cell will automatically hide to better present the data.
121
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/domain/user.py DELETED
@@ -1,4 +0,0 @@
1
- class User:
2
-
3
- def __init__(self, username, ):
4
- self.name = username
 
 
 
 
 
src/model/__pycache__/block.cpython-310.pyc ADDED
Binary file (1.73 kB). View file