timeki commited on
Commit
47fab06
·
1 Parent(s): 69f7a91

minor cleans

Browse files
app.py CHANGED
@@ -116,7 +116,7 @@ def cqa_tab(tab_name):
116
  with gr.Row(elem_id="chatbot-row"):
117
  # Left column - Chat interface
118
  with gr.Column(scale=2):
119
- chatbot, textbox, config_button = create_chat_interface()
120
 
121
  # Right column - Content panels
122
  with gr.Column(scale=2, variant="panel", elem_id="right-panel"):
@@ -280,7 +280,7 @@ def main_ui():
280
  create_about_tab()
281
 
282
  event_handling(cqa_components, config_components, tab_name = 'ClimateQ&A')
283
- event_handling(local_cqa_components, config_components, tab_name = 'Beta - POC Adapt\'Action')
284
 
285
  demo.queue()
286
 
 
116
  with gr.Row(elem_id="chatbot-row"):
117
  # Left column - Chat interface
118
  with gr.Column(scale=2):
119
+ chatbot, textbox, config_button = create_chat_interface(tab_name)
120
 
121
  # Right column - Content panels
122
  with gr.Column(scale=2, variant="panel", elem_id="right-panel"):
 
280
  create_about_tab()
281
 
282
  event_handling(cqa_components, config_components, tab_name = 'ClimateQ&A')
283
+ event_handling(local_cqa_components, config_components, tab_name = "Beta - POC Adapt'Action")
284
 
285
  demo.queue()
286
 
climateqa/engine/chains/prompts.py CHANGED
@@ -66,10 +66,11 @@ You are ClimateQ&A, an AI Assistant created by Ekimetrics. You are given a quest
66
  Guidelines:
67
  - If the passages have useful facts or numbers, use them in your answer.
68
  - When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
69
- - You will receive passages from different reports, eg IPCC and PPCP, make separate paragraphs and specify the source of the information in your answer, eg "According to IPCC, ...".
70
- - The different sources are IPCC, IPBES, PPCP (for Plan Climat Air Energie Territorial de Paris), PBDP (for Plan Biodiversité de Paris), Acclimaterra.
 
71
  - Do not mention that you are using specific extract documents, but mention only the source information. "According to IPCC, ..." rather than "According to the provided document from IPCC ..."
72
- - Make a clear distinction between information from IPCC, IPBES, Acclimaterra that are scientific reports and PPCP, PBDP that are strategic reports. Strategic reports should not be taken has verified facts, but as political or strategic decisions.
73
  - If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]
74
  - Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.
75
  - If it makes sense, use bullet points and lists to make your answers easier to understand.
@@ -78,7 +79,6 @@ Guidelines:
78
  - Consider by default that the question is about the past century unless it is specified otherwise.
79
  - If the passage is the caption of a picture, you can still use it as part of your answer as any other document.
80
 
81
-
82
  -----------------------
83
  Passages:
84
  {context}
 
66
  Guidelines:
67
  - If the passages have useful facts or numbers, use them in your answer.
68
  - When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
69
+ - You will receive passages from different reports, e.g., IPCC and PPCP. Make separate paragraphs and specify the source of the information in your answer, e.g., "According to IPCC, ...".
70
+ - The different sources are IPCC, IPBES, PPCP (for Plan Climat Air Energie Territorial de Paris), PBDP (for Plan Biodiversité de Paris), Acclimaterra (Rapport scientifique de la région Nouvelle Aquitaine en France).
71
+ - If the reports are local (like PPCP, PBDP, Acclimaterra), consider that the information is specific to the region and not global. If the document is about a nearby region (for example, an extract from Acclimaterra for a question about Britain), explicitly state the concerned region.
72
  - Do not mention that you are using specific extract documents, but mention only the source information. "According to IPCC, ..." rather than "According to the provided document from IPCC ..."
73
+ - Make a clear distinction between information from IPCC, IPBES, Acclimaterra that are scientific reports and PPCP, PBDP that are strategic reports. Strategic reports should not be taken as verified facts, but as political or strategic decisions.
74
  - If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]
75
  - Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.
76
  - If it makes sense, use bullet points and lists to make your answers easier to understand.
 
79
  - Consider by default that the question is about the past century unless it is specified otherwise.
80
  - If the passage is the caption of a picture, you can still use it as part of your answer as any other document.
81
 
 
82
  -----------------------
83
  Passages:
84
  {context}
climateqa/engine/chains/retrieve_documents.py CHANGED
@@ -370,22 +370,38 @@ async def retrieve_documents(
370
  return docs_question, images_question
371
 
372
 
373
- async def retrieve_documents_for_all_questions(state, config, source_type, to_handle_questions_index, vectorstore, reranker, rerank_by_question=True, k_final=15, k_before_reranking=100):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
  """
375
  Retrieve documents in parallel for all questions.
376
  """
377
  # to_handle_questions_index = [x for x in state["questions_list"] if x["source_type"] == "IPx"]
378
 
379
  # TODO split les questions selon le type de sources dans le state question + conditions sur le nombre de questions traités par type de source
380
- docs = state.get("documents", [])
381
- related_content = state.get("related_content", [])
382
- search_figures = "Figures (IPCC/IPBES)" in state["relevant_content_sources_selection"]
383
- search_only = state["search_only"]
384
- reports = state["reports"]
385
 
386
- k_by_question = k_final // state["n_questions"]["total"]
387
- k_summary_by_question = _get_k_summary_by_question(state["n_questions"]["total"])
388
- k_images_by_question = _get_k_images_by_question(state["n_questions"]["total"])
 
 
 
389
  k_before_reranking=100
390
 
391
  tasks = [
@@ -404,7 +420,7 @@ async def retrieve_documents_for_all_questions(state, config, source_type, to_ha
404
  k_by_question=k_by_question,
405
  k_summary_by_question=k_summary_by_question
406
  )
407
- for i, question in enumerate(state["questions_list"]) if i in to_handle_questions_index
408
  ]
409
  results = await asyncio.gather(*tasks)
410
  # Combine results
@@ -420,10 +436,18 @@ def make_IPx_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_
420
  source_type = "IPx"
421
  IPx_questions_index = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "IPx"]
422
 
423
- # return {"documents":[], "related_contents": [], "handled_questions_index": list(range(len(state["questions_list"])))} # TODO Remove
424
-
 
 
 
 
425
  state = await retrieve_documents_for_all_questions(
426
- state=state,
 
 
 
 
427
  config=config,
428
  source_type=source_type,
429
  to_handle_questions_index=IPx_questions_index,
@@ -447,8 +471,18 @@ def make_POC_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_
447
  source_type = "POC"
448
  POC_questions_index = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "POC"]
449
 
 
 
 
 
 
 
450
  state = await retrieve_documents_for_all_questions(
451
- state=state,
 
 
 
 
452
  config=config,
453
  source_type=source_type,
454
  to_handle_questions_index=POC_questions_index,
 
370
  return docs_question, images_question
371
 
372
 
373
+ async def retrieve_documents_for_all_questions(
374
+ search_figures,
375
+ search_only,
376
+ reports,
377
+ questions_list,
378
+ n_questions,
379
+ config,
380
+ source_type,
381
+ to_handle_questions_index,
382
+ vectorstore,
383
+ reranker,
384
+ rerank_by_question=True,
385
+ k_final=15,
386
+ k_before_reranking=100
387
+ ):
388
  """
389
  Retrieve documents in parallel for all questions.
390
  """
391
  # to_handle_questions_index = [x for x in state["questions_list"] if x["source_type"] == "IPx"]
392
 
393
  # TODO split les questions selon le type de sources dans le state question + conditions sur le nombre de questions traités par type de source
394
+ # search_figures = "Figures (IPCC/IPBES)" in state["relevant_content_sources_selection"]
395
+ # search_only = state["search_only"]
396
+ # reports = state["reports"]
397
+ # questions_list = state["questions_list"]
 
398
 
399
+ # k_by_question = k_final // state["n_questions"]["total"]
400
+ # k_summary_by_question = _get_k_summary_by_question(state["n_questions"]["total"])
401
+ # k_images_by_question = _get_k_images_by_question(state["n_questions"]["total"])
402
+ k_by_question = k_final // n_questions
403
+ k_summary_by_question = _get_k_summary_by_question(n_questions)
404
+ k_images_by_question = _get_k_images_by_question(n_questions)
405
  k_before_reranking=100
406
 
407
  tasks = [
 
420
  k_by_question=k_by_question,
421
  k_summary_by_question=k_summary_by_question
422
  )
423
+ for i, question in enumerate(questions_list) if i in to_handle_questions_index
424
  ]
425
  results = await asyncio.gather(*tasks)
426
  # Combine results
 
436
  source_type = "IPx"
437
  IPx_questions_index = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "IPx"]
438
 
439
+ search_figures = "Figures (IPCC/IPBES)" in state["relevant_content_sources_selection"]
440
+ search_only = state["search_only"]
441
+ reports = state["reports"]
442
+ questions_list = state["questions_list"]
443
+ n_questions=state["n_questions"]["total"]
444
+
445
  state = await retrieve_documents_for_all_questions(
446
+ search_figures=search_figures,
447
+ search_only=search_only,
448
+ reports=reports,
449
+ questions_list=questions_list,
450
+ n_questions=n_questions,
451
  config=config,
452
  source_type=source_type,
453
  to_handle_questions_index=IPx_questions_index,
 
471
  source_type = "POC"
472
  POC_questions_index = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "POC"]
473
 
474
+ search_figures = "Figures (IPCC/IPBES)" in state["relevant_content_sources_selection"]
475
+ search_only = state["search_only"]
476
+ reports = state["reports"]
477
+ questions_list = state["questions_list"]
478
+ n_questions=state["n_questions"]["total"]
479
+
480
  state = await retrieve_documents_for_all_questions(
481
+ search_figures=search_figures,
482
+ search_only=search_only,
483
+ reports=reports,
484
+ questions_list=questions_list,
485
+ n_questions=n_questions,
486
  config=config,
487
  source_type=source_type,
488
  to_handle_questions_index=POC_questions_index,
front/tabs/chat_interface.py CHANGED
@@ -20,12 +20,31 @@ Please note that we log your questions for meta-analysis purposes, so avoid shar
20
  What do you want to learn ?
21
  """
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
 
25
  # UI Layout Components
26
- def create_chat_interface():
 
27
  chatbot = gr.Chatbot(
28
- value=[ChatMessage(role="assistant", content=init_prompt)],
29
  type="messages",
30
  show_copy_button=True,
31
  show_label=False,
 
20
  What do you want to learn ?
21
  """
22
 
23
+ init_prompt_poc = """
24
+ Hello, I am ClimateQ&A, a conversational assistant designed to help you understand climate change and biodiversity loss. I will answer your questions by **sifting through the IPCC and IPBES scientific reports, PCAET of Paris, the Plan Biodiversité 2018-2024, and Acclimaterra reports from la Région Nouvelle-Aquitaine **.
25
+
26
+ ❓ How to use
27
+ - **Language**: You can ask me your questions in any language.
28
+ - **Audience**: You can specify your audience (children, general public, experts) to get a more adapted answer.
29
+ - **Sources**: You can choose to search in the IPCC or IPBES reports, and POC sources for local documents (PCAET, Plan Biodiversité, Acclimaterra).
30
+ - **Relevant content sources**: You can choose to search for figures, papers, or graphs that can be relevant for your question.
31
+
32
+ ⚠️ Limitations
33
+ *Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
34
+
35
+ 🛈 Information
36
+ Please note that we log your questions for meta-analysis purposes, so avoid sharing any sensitive or personal information.
37
+
38
+ What do you want to learn ?
39
+ """
40
+
41
 
42
 
43
  # UI Layout Components
44
+ def create_chat_interface(tab):
45
+ init_prompt_message = init_prompt_poc if tab == "Beta - POC Adapt'Action" else init_prompt
46
  chatbot = gr.Chatbot(
47
+ value=[ChatMessage(role="assistant", content=init_prompt_message)],
48
  type="messages",
49
  show_copy_button=True,
50
  show_label=False,
front/tabs/main_tab.py CHANGED
@@ -3,7 +3,6 @@ from .chat_interface import create_chat_interface
3
  from .tab_examples import create_examples_tab
4
  from .tab_papers import create_papers_tab
5
  from .tab_figures import create_figures_tab
6
- from .chat_interface import create_chat_interface
7
 
8
  def cqa_tab(tab_name):
9
  # State variables
@@ -12,7 +11,7 @@ def cqa_tab(tab_name):
12
  with gr.Row(elem_id="chatbot-row"):
13
  # Left column - Chat interface
14
  with gr.Column(scale=2):
15
- chatbot, textbox, config_button = create_chat_interface()
16
 
17
  # Right column - Content panels
18
  with gr.Column(scale=2, variant="panel", elem_id="right-panel"):
 
3
  from .tab_examples import create_examples_tab
4
  from .tab_papers import create_papers_tab
5
  from .tab_figures import create_figures_tab
 
6
 
7
  def cqa_tab(tab_name):
8
  # State variables
 
11
  with gr.Row(elem_id="chatbot-row"):
12
  # Left column - Chat interface
13
  with gr.Column(scale=2):
14
+ chatbot, textbox, config_button = create_chat_interface(tab_name)
15
 
16
  # Right column - Content panels
17
  with gr.Column(scale=2, variant="panel", elem_id="right-panel"):
sandbox/20241104 - CQA - StepByStep CQA.ipynb CHANGED
The diff for this file is too large to render. See raw diff