update AFP agent with access to its vectorstore
Browse files- app.py +72 -36
- assets/style.css +5 -0
- assets/utils_javascript.py +6 -0
- spinoza_project/config.yaml +3 -5
- spinoza_project/source/frontend/utils.py +16 -0
app.py
CHANGED
@@ -14,6 +14,7 @@ from spinoza_project.source.backend.get_prompts import get_qa_prompts
|
|
14 |
from spinoza_project.source.frontend.utils import (
|
15 |
make_html_source,
|
16 |
make_html_presse_source,
|
|
|
17 |
parse_output_llm_with_sources,
|
18 |
init_env,
|
19 |
)
|
@@ -48,6 +49,7 @@ llm = get_llm_api()
|
|
48 |
## Loading_tools
|
49 |
print("Loading Databases")
|
50 |
bdd_presse = get_vectorstore_api("presse")
|
|
|
51 |
qdrants = {
|
52 |
tab: pickle_to_document_store(
|
53 |
hf_hub_download(
|
@@ -57,7 +59,7 @@ qdrants = {
|
|
57 |
)
|
58 |
)
|
59 |
for tab in config["prompt_naming"]
|
60 |
-
if tab != "Presse"
|
61 |
}
|
62 |
|
63 |
## Load Prompts
|
@@ -206,43 +208,59 @@ def answer_questions(*questions_sources, config=config):
|
|
206 |
]
|
207 |
|
208 |
|
209 |
-
def get_sources(
|
|
|
|
|
210 |
k = config["num_document_retrieved"]
|
211 |
min_similarity = config["min_similarity"]
|
212 |
-
formated = []
|
213 |
-
text = []
|
214 |
for i, (question, tab) in enumerate(zip(questions, list(config["tabs"].keys()))):
|
215 |
-
|
216 |
-
(
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
)
|
222 |
-
|
223 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
config["source_mapping"][tab]
|
225 |
].similarity_search_with_relevance_scores(
|
226 |
config["query_preprompt"]
|
227 |
+ question.replace("<p>", "").replace("</p>\n", ""),
|
228 |
k=k,
|
229 |
)
|
230 |
-
|
231 |
-
|
232 |
-
sources = [
|
233 |
-
(doc, score) for doc, score in sources
|
234 |
-
] # if score >= min_similarity]
|
235 |
-
formated.extend(
|
236 |
-
[
|
237 |
-
make_html_presse_source(source[0], j, source[1])
|
238 |
-
for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
|
239 |
]
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
|
|
246 |
text.extend(
|
247 |
[
|
248 |
"\n\n".join(
|
@@ -260,8 +278,12 @@ def get_sources(questions, qdrants=qdrants, bdd_presse=bdd_presse, config=config
|
|
260 |
return formated, text
|
261 |
|
262 |
|
263 |
-
def retrieve_sources(
|
264 |
-
|
|
|
|
|
|
|
|
|
265 |
|
266 |
return (formated_sources, *text_sources)
|
267 |
|
@@ -342,10 +364,6 @@ with gr.Blocks(
|
|
342 |
component_sources = {elt: gr.State("") for elt in config["tabs"]}
|
343 |
text_sources = {elt: gr.State("") for elt in config["tabs"]}
|
344 |
tab_states = {elt: gr.State(elt) for elt in config["tabs"]}
|
345 |
-
chatbot_states = [
|
346 |
-
gr.State(name)
|
347 |
-
for name in ["science", "presse", "politique", "legal", "spinoza"]
|
348 |
-
]
|
349 |
|
350 |
with gr.Tab("Q&A", elem_id="main-component"):
|
351 |
with gr.Row(elem_id="chatbot-row"):
|
@@ -401,7 +419,7 @@ with gr.Blocks(
|
|
401 |
layout="panel",
|
402 |
avatar_images=(
|
403 |
"./assets/logos/help.png",
|
404 |
-
None,
|
405 |
),
|
406 |
)
|
407 |
|
@@ -419,7 +437,7 @@ with gr.Blocks(
|
|
419 |
layout="panel",
|
420 |
avatar_images=(
|
421 |
"./assets/logos/help.png",
|
422 |
-
None,
|
423 |
),
|
424 |
)
|
425 |
|
@@ -437,7 +455,25 @@ with gr.Blocks(
|
|
437 |
layout="panel",
|
438 |
avatar_images=(
|
439 |
"./assets/logos/help.png",
|
440 |
-
None,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
441 |
),
|
442 |
)
|
443 |
|
|
|
14 |
from spinoza_project.source.frontend.utils import (
|
15 |
make_html_source,
|
16 |
make_html_presse_source,
|
17 |
+
make_html_afp_source,
|
18 |
parse_output_llm_with_sources,
|
19 |
init_env,
|
20 |
)
|
|
|
49 |
## Loading_tools
|
50 |
print("Loading Databases")
|
51 |
bdd_presse = get_vectorstore_api("presse")
|
52 |
+
bdd_afp = get_vectorstore_api("afp")
|
53 |
qdrants = {
|
54 |
tab: pickle_to_document_store(
|
55 |
hf_hub_download(
|
|
|
59 |
)
|
60 |
)
|
61 |
for tab in config["prompt_naming"]
|
62 |
+
if tab != "Presse" and tab != "AFP"
|
63 |
}
|
64 |
|
65 |
## Load Prompts
|
|
|
208 |
]
|
209 |
|
210 |
|
211 |
+
def get_sources(
|
212 |
+
questions, qdrants=qdrants, bdd_presse=bdd_presse, bdd_afp=bdd_afp, config=config
|
213 |
+
):
|
214 |
k = config["num_document_retrieved"]
|
215 |
min_similarity = config["min_similarity"]
|
216 |
+
text, formated = [], []
|
|
|
217 |
for i, (question, tab) in enumerate(zip(questions, list(config["tabs"].keys()))):
|
218 |
+
if tab == "Presse":
|
219 |
+
sources = bdd_presse.similarity_search_with_relevance_scores(
|
220 |
+
question.replace("<p>", "").replace("</p>\n", ""), k=k
|
221 |
+
)
|
222 |
+
sources = [
|
223 |
+
(doc, score) for doc, score in sources if score >= min_similarity
|
224 |
+
]
|
225 |
+
formated.extend(
|
226 |
+
[
|
227 |
+
make_html_presse_source(source[0], j, source[1])
|
228 |
+
for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
|
229 |
+
]
|
230 |
+
)
|
231 |
+
|
232 |
+
elif tab == "AFP":
|
233 |
+
sources = bdd_afp.similarity_search_with_relevance_scores(
|
234 |
+
question.replace("<p>", "").replace("</p>\n", ""), k=k
|
235 |
)
|
236 |
+
sources = [
|
237 |
+
(doc, score) for doc, score in sources if score >= min_similarity
|
238 |
+
]
|
239 |
+
formated.extend(
|
240 |
+
[
|
241 |
+
make_html_afp_source(source[0], j, source[1])
|
242 |
+
for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
|
243 |
+
]
|
244 |
+
)
|
245 |
+
|
246 |
+
else:
|
247 |
+
sources = qdrants[
|
248 |
config["source_mapping"][tab]
|
249 |
].similarity_search_with_relevance_scores(
|
250 |
config["query_preprompt"]
|
251 |
+ question.replace("<p>", "").replace("</p>\n", ""),
|
252 |
k=k,
|
253 |
)
|
254 |
+
sources = [
|
255 |
+
(doc, score) for doc, score in sources if score >= min_similarity
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
]
|
257 |
+
formated.extend(
|
258 |
+
[
|
259 |
+
make_html_source(source[0], j, source[1], config)
|
260 |
+
for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
|
261 |
+
]
|
262 |
+
)
|
263 |
+
|
264 |
text.extend(
|
265 |
[
|
266 |
"\n\n".join(
|
|
|
278 |
return formated, text
|
279 |
|
280 |
|
281 |
+
def retrieve_sources(
|
282 |
+
*questions, qdrants=qdrants, bdd_presse=bdd_presse, bdd_afp=bdd_afp, config=config
|
283 |
+
):
|
284 |
+
formated_sources, text_sources = get_sources(
|
285 |
+
questions, qdrants, bdd_presse, bdd_afp, config
|
286 |
+
)
|
287 |
|
288 |
return (formated_sources, *text_sources)
|
289 |
|
|
|
364 |
component_sources = {elt: gr.State("") for elt in config["tabs"]}
|
365 |
text_sources = {elt: gr.State("") for elt in config["tabs"]}
|
366 |
tab_states = {elt: gr.State(elt) for elt in config["tabs"]}
|
|
|
|
|
|
|
|
|
367 |
|
368 |
with gr.Tab("Q&A", elem_id="main-component"):
|
369 |
with gr.Row(elem_id="chatbot-row"):
|
|
|
419 |
layout="panel",
|
420 |
avatar_images=(
|
421 |
"./assets/logos/help.png",
|
422 |
+
None,
|
423 |
),
|
424 |
)
|
425 |
|
|
|
437 |
layout="panel",
|
438 |
avatar_images=(
|
439 |
"./assets/logos/help.png",
|
440 |
+
None,
|
441 |
),
|
442 |
)
|
443 |
|
|
|
455 |
layout="panel",
|
456 |
avatar_images=(
|
457 |
"./assets/logos/help.png",
|
458 |
+
None,
|
459 |
+
),
|
460 |
+
)
|
461 |
+
|
462 |
+
with gr.Accordion(
|
463 |
+
"AFP agent",
|
464 |
+
open=False,
|
465 |
+
elem_id="accordion-afp",
|
466 |
+
elem_classes="accordion",
|
467 |
+
):
|
468 |
+
chatbots[list(config["tabs"].keys())[5]] = gr.Chatbot(
|
469 |
+
show_copy_button=True,
|
470 |
+
show_share_button=False,
|
471 |
+
show_label=False,
|
472 |
+
elem_id="chatbot-afp",
|
473 |
+
layout="panel",
|
474 |
+
avatar_images=(
|
475 |
+
"./assets/logos/help.png",
|
476 |
+
None,
|
477 |
),
|
478 |
)
|
479 |
|
assets/style.css
CHANGED
@@ -160,6 +160,11 @@ a {
|
|
160 |
font-weight: bold;
|
161 |
}
|
162 |
|
|
|
|
|
|
|
|
|
|
|
163 |
}
|
164 |
|
165 |
textarea.scroll-hide {
|
|
|
160 |
font-weight: bold;
|
161 |
}
|
162 |
|
163 |
+
#accordion-afp>button:nth-child(2)>span:nth-child(1) {
|
164 |
+
color: #9ca1a5e7;
|
165 |
+
font-weight: bold;
|
166 |
+
}
|
167 |
+
|
168 |
}
|
169 |
|
170 |
textarea.scroll-hide {
|
assets/utils_javascript.py
CHANGED
@@ -23,6 +23,7 @@ def accordion_trigger():
|
|
23 |
var accordion_politique = document.getElementById("accordion-politique")
|
24 |
var accordion_legal = document.getElementById("accordion-legal")
|
25 |
var accordion_ademe= document.getElementById("accordion-ademe")
|
|
|
26 |
var accordion_spinoza = document.getElementById("accordion-spinoza")
|
27 |
document.querySelectorAll(".loader").forEach(el => el.remove());
|
28 |
document.querySelectorAll(".loader-helper").forEach(el => el.remove());
|
@@ -31,6 +32,7 @@ def accordion_trigger():
|
|
31 |
accordion_politique.children[1].children[0].textContent = "Politics agent";
|
32 |
accordion_legal.children[1].children[0].textContent = "Law agent";
|
33 |
accordion_ademe.children[1].children[0].textContent = "ADEME agent";
|
|
|
34 |
accordion_spinoza.children[1].children[0].textContent = "Spinoza";
|
35 |
accordion_science.children[1].children[0].innerHTML += "<span class='loader-helper'> - </span>";
|
36 |
accordion_science.children[1].children[0].innerHTML += "<span class='loader'>loading</span>";
|
@@ -42,6 +44,8 @@ def accordion_trigger():
|
|
42 |
accordion_legal.children[1].children[0].innerHTML += "<span class='loader'>loading</span>";
|
43 |
accordion_ademe.children[1].children[0].innerHTML += "<span class='loader-helper'> - </span>";
|
44 |
accordion_ademe.children[1].children[0].innerHTML += "<span class='loader'>loading</span>";
|
|
|
|
|
45 |
}
|
46 |
});
|
47 |
}
|
@@ -56,11 +60,13 @@ def accordion_trigger_end():
|
|
56 |
var accordion_politique = document.getElementById("accordion-politique")
|
57 |
var accordion_legal = document.getElementById("accordion-legal")
|
58 |
var accordion_ademe = document.getElementById("accordion-ademe")
|
|
|
59 |
accordion_science.children[1].children[0].textContent = "Science agent - ready";
|
60 |
accordion_presse.children[1].children[0].textContent = "Press agent - ready";
|
61 |
accordion_politique.children[1].children[0].textContent = "Politics agent - ready";
|
62 |
accordion_legal.children[1].children[0].textContent = "Law agent - ready";
|
63 |
accordion_ademe.children[1].children[0].textContent = "ADEME agent - ready";
|
|
|
64 |
}
|
65 |
"""
|
66 |
|
|
|
23 |
var accordion_politique = document.getElementById("accordion-politique")
|
24 |
var accordion_legal = document.getElementById("accordion-legal")
|
25 |
var accordion_ademe= document.getElementById("accordion-ademe")
|
26 |
+
var accordion_afp= document.getElementById("accordion-afp")
|
27 |
var accordion_spinoza = document.getElementById("accordion-spinoza")
|
28 |
document.querySelectorAll(".loader").forEach(el => el.remove());
|
29 |
document.querySelectorAll(".loader-helper").forEach(el => el.remove());
|
|
|
32 |
accordion_politique.children[1].children[0].textContent = "Politics agent";
|
33 |
accordion_legal.children[1].children[0].textContent = "Law agent";
|
34 |
accordion_ademe.children[1].children[0].textContent = "ADEME agent";
|
35 |
+
accordion_afp.children[1].children[0].textContent = "AFP agent";
|
36 |
accordion_spinoza.children[1].children[0].textContent = "Spinoza";
|
37 |
accordion_science.children[1].children[0].innerHTML += "<span class='loader-helper'> - </span>";
|
38 |
accordion_science.children[1].children[0].innerHTML += "<span class='loader'>loading</span>";
|
|
|
44 |
accordion_legal.children[1].children[0].innerHTML += "<span class='loader'>loading</span>";
|
45 |
accordion_ademe.children[1].children[0].innerHTML += "<span class='loader-helper'> - </span>";
|
46 |
accordion_ademe.children[1].children[0].innerHTML += "<span class='loader'>loading</span>";
|
47 |
+
accordion_afp.children[1].children[0].innerHTML += "<span class='loader-helper'> - </span>";
|
48 |
+
accordion_afp.children[1].children[0].innerHTML += "<span class='loader'>loading</span>";
|
49 |
}
|
50 |
});
|
51 |
}
|
|
|
60 |
var accordion_politique = document.getElementById("accordion-politique")
|
61 |
var accordion_legal = document.getElementById("accordion-legal")
|
62 |
var accordion_ademe = document.getElementById("accordion-ademe")
|
63 |
+
var accordion_afp= document.getElementById("accordion-afp")
|
64 |
accordion_science.children[1].children[0].textContent = "Science agent - ready";
|
65 |
accordion_presse.children[1].children[0].textContent = "Press agent - ready";
|
66 |
accordion_politique.children[1].children[0].textContent = "Politics agent - ready";
|
67 |
accordion_legal.children[1].children[0].textContent = "Law agent - ready";
|
68 |
accordion_ademe.children[1].children[0].textContent = "ADEME agent - ready";
|
69 |
+
accordion_afp.children[1].children[0].textContent = "AFP agent - ready";
|
70 |
}
|
71 |
"""
|
72 |
|
spinoza_project/config.yaml
CHANGED
@@ -11,10 +11,7 @@ tabs:
|
|
11 |
* *Des documents institutionnels (analyses demandées par la France & rapports d'activité)*\n
|
12 |
* *Les plans de transition sectoriels pour les secteurs industriels les plus émetteurs : (verre, papier, ciment, acier, aluminium, chimie, sucre)*"
|
13 |
Presse: "*Outil dédié aux données fournies par Aday concernant la presse.*"
|
14 |
-
|
15 |
-
logo_rsf: ""
|
16 |
-
|
17 |
-
logo_ap: ""
|
18 |
|
19 |
source_mapping:
|
20 |
GIEC et IPBES: "Science"
|
@@ -22,6 +19,7 @@ source_mapping:
|
|
22 |
Documents Stratégiques: "Politique"
|
23 |
ADEME: "ADEME"
|
24 |
Presse: "Presse"
|
|
|
25 |
|
26 |
prompt_naming:
|
27 |
Science: "Science"
|
@@ -29,8 +27,8 @@ prompt_naming:
|
|
29 |
Politique: "Politique"
|
30 |
ADEME: "ADEME"
|
31 |
Presse: "Presse"
|
|
|
32 |
|
33 |
-
database_index_path: "./app/data/database_tab_placeholder.pickle"
|
34 |
query_preprompt: "query: "
|
35 |
passage_preprompt: "passage: "
|
36 |
embedding_model: "intfloat/multilingual-e5-base"
|
|
|
11 |
* *Des documents institutionnels (analyses demandées par la France & rapports d'activité)*\n
|
12 |
* *Les plans de transition sectoriels pour les secteurs industriels les plus émetteurs : (verre, papier, ciment, acier, aluminium, chimie, sucre)*"
|
13 |
Presse: "*Outil dédié aux données fournies par Aday concernant la presse.*"
|
14 |
+
AFP: "AFP"
|
|
|
|
|
|
|
15 |
|
16 |
source_mapping:
|
17 |
GIEC et IPBES: "Science"
|
|
|
19 |
Documents Stratégiques: "Politique"
|
20 |
ADEME: "ADEME"
|
21 |
Presse: "Presse"
|
22 |
+
AFP: "AFP"
|
23 |
|
24 |
prompt_naming:
|
25 |
Science: "Science"
|
|
|
27 |
Politique: "Politique"
|
28 |
ADEME: "ADEME"
|
29 |
Presse: "Presse"
|
30 |
+
AFP: "AFP"
|
31 |
|
|
|
32 |
query_preprompt: "query: "
|
33 |
passage_preprompt: "passage: "
|
34 |
embedding_model: "intfloat/multilingual-e5-base"
|
spinoza_project/source/frontend/utils.py
CHANGED
@@ -91,6 +91,22 @@ def make_html_presse_source(source, i, score):
|
|
91 |
"""
|
92 |
|
93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
def make_html_source(source, i, score, config):
|
95 |
meta = source.metadata
|
96 |
return f"""
|
|
|
91 |
"""
|
92 |
|
93 |
|
94 |
+
def make_html_afp_source(source, i, score):
|
95 |
+
meta = source.metadata
|
96 |
+
return f"""
|
97 |
+
<div class="card" id="doc{i}">
|
98 |
+
<div class="card-content">
|
99 |
+
<h2>Doc {i} - {meta['file_title']} - {meta['file_type']} AFP</h2>
|
100 |
+
<p>{source.page_content}</p>
|
101 |
+
</div>
|
102 |
+
<div class="card-footer">
|
103 |
+
<span>{meta['file_source_type']}</span>
|
104 |
+
<span>Relevance Score : {round(100*score,1)}%</span>
|
105 |
+
</div>
|
106 |
+
</div>
|
107 |
+
"""
|
108 |
+
|
109 |
+
|
110 |
def make_html_source(source, i, score, config):
|
111 |
meta = source.metadata
|
112 |
return f"""
|