Spaces:

Ekimetrics
/

climate-question-answering

Running

App Files Files Community

timeki commited on 14 days ago

Commit

47fab06

1 Parent(s): 69f7a91

minor cleans

Browse files

Files changed (6) hide show

app.py +2 -2
climateqa/engine/chains/prompts.py +4 -4
climateqa/engine/chains/retrieve_documents.py +48 -14
front/tabs/chat_interface.py +21 -2
front/tabs/main_tab.py +1 -2
sandbox/20241104 - CQA - StepByStep CQA.ipynb +0 -0

app.py CHANGED Viewed

@@ -116,7 +116,7 @@ def cqa_tab(tab_name):
         with gr.Row(elem_id="chatbot-row"):
             # Left column - Chat interface
             with gr.Column(scale=2):
-                chatbot, textbox, config_button = create_chat_interface()
             # Right column - Content panels
             with gr.Column(scale=2, variant="panel", elem_id="right-panel"):
@@ -280,7 +280,7 @@ def main_ui():
             create_about_tab()
         event_handling(cqa_components, config_components, tab_name = 'ClimateQ&A')
-        event_handling(local_cqa_components, config_components, tab_name = 'Beta - POC Adapt\'Action')
         demo.queue()

         with gr.Row(elem_id="chatbot-row"):
             # Left column - Chat interface
             with gr.Column(scale=2):
+                chatbot, textbox, config_button = create_chat_interface(tab_name)
             # Right column - Content panels
             with gr.Column(scale=2, variant="panel", elem_id="right-panel"):
             create_about_tab()
         event_handling(cqa_components, config_components, tab_name = 'ClimateQ&A')
+        event_handling(local_cqa_components, config_components, tab_name = "Beta - POC Adapt'Action")
         demo.queue()

climateqa/engine/chains/prompts.py CHANGED Viewed

@@ -66,10 +66,11 @@ You are ClimateQ&A, an AI Assistant created by Ekimetrics. You are given a quest
 Guidelines:
 - If the passages have useful facts or numbers, use them in your answer.
 - When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
-- You will receive passages from different reports, eg IPCC and PPCP, make separate paragraphs and specify the source of the information in your answer, eg "According to IPCC, ...".
-- The different sources are IPCC, IPBES, PPCP (for Plan Climat Air Energie Territorial de Paris), PBDP (for Plan Biodiversité de Paris), Acclimaterra.
 - Do not mention that you are using specific extract documents, but mention only the source information. "According to IPCC, ..." rather than "According to the provided document from IPCC ..."
-- Make a clear distinction between information from IPCC, IPBES, Acclimaterra that are scientific reports and PPCP, PBDP that are strategic reports. Strategic reports should not be taken has verified facts, but as political or strategic decisions.
 - If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]
 - Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.
 - If it makes sense, use bullet points and lists to make your answers easier to understand.
@@ -78,7 +79,6 @@ Guidelines:
 - Consider by default that the question is about the past century unless it is specified otherwise.
 - If the passage is the caption of a picture, you can still use it as part of your answer as any other document.
 -----------------------
 Passages:
 {context}

 Guidelines:
 - If the passages have useful facts or numbers, use them in your answer.
 - When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
+- You will receive passages from different reports, e.g., IPCC and PPCP. Make separate paragraphs and specify the source of the information in your answer, e.g., "According to IPCC, ...".
+- The different sources are IPCC, IPBES, PPCP (for Plan Climat Air Energie Territorial de Paris), PBDP (for Plan Biodiversité de Paris), Acclimaterra (Rapport scientifique de la région Nouvelle Aquitaine en France).
+- If the reports are local (like PPCP, PBDP, Acclimaterra), consider that the information is specific to the region and not global. If the document is about a nearby region (for example, an extract from Acclimaterra for a question about Britain), explicitly state the concerned region.
 - Do not mention that you are using specific extract documents, but mention only the source information. "According to IPCC, ..." rather than "According to the provided document from IPCC ..."
+- Make a clear distinction between information from IPCC, IPBES, Acclimaterra that are scientific reports and PPCP, PBDP that are strategic reports. Strategic reports should not be taken as verified facts, but as political or strategic decisions.
 - If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]
 - Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.
 - If it makes sense, use bullet points and lists to make your answers easier to understand.
 - Consider by default that the question is about the past century unless it is specified otherwise.
 - If the passage is the caption of a picture, you can still use it as part of your answer as any other document.
 -----------------------
 Passages:
 {context}

climateqa/engine/chains/retrieve_documents.py CHANGED Viewed

@@ -370,22 +370,38 @@ async def retrieve_documents(
     return docs_question, images_question
-async def retrieve_documents_for_all_questions(state, config, source_type, to_handle_questions_index, vectorstore, reranker, rerank_by_question=True, k_final=15, k_before_reranking=100):
     """
     Retrieve documents in parallel for all questions.
     """
     # to_handle_questions_index = [x for x in state["questions_list"] if x["source_type"] == "IPx"]
     # TODO split les questions selon le type de sources dans le state question + conditions sur le nombre de questions traités par type de source
-    docs = state.get("documents", [])
-    related_content = state.get("related_content", [])
-    search_figures = "Figures (IPCC/IPBES)" in state["relevant_content_sources_selection"]
-    search_only = state["search_only"]
-    reports = state["reports"]
-    k_by_question = k_final // state["n_questions"]["total"]
-    k_summary_by_question = _get_k_summary_by_question(state["n_questions"]["total"])
-    k_images_by_question = _get_k_images_by_question(state["n_questions"]["total"])
     k_before_reranking=100
     tasks = [
@@ -404,7 +420,7 @@ async def retrieve_documents_for_all_questions(state, config, source_type, to_ha
             k_by_question=k_by_question,
             k_summary_by_question=k_summary_by_question
         )
-        for i, question in enumerate(state["questions_list"]) if i in to_handle_questions_index
     ]
     results = await asyncio.gather(*tasks)
     # Combine results
@@ -420,10 +436,18 @@ def make_IPx_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_
         source_type = "IPx"
         IPx_questions_index = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "IPx"]
-        # return {"documents":[], "related_contents": [], "handled_questions_index": list(range(len(state["questions_list"])))} # TODO Remove
         state = await retrieve_documents_for_all_questions(
-            state=state,
             config=config,
             source_type=source_type,
             to_handle_questions_index=IPx_questions_index,
@@ -447,8 +471,18 @@ def make_POC_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_
         source_type = "POC"
         POC_questions_index = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "POC"]
         state = await retrieve_documents_for_all_questions(
-            state=state,
             config=config,
             source_type=source_type,
             to_handle_questions_index=POC_questions_index,

     return docs_question, images_question
+async def retrieve_documents_for_all_questions(
+    search_figures,
+    search_only,
+    reports,
+    questions_list,
+    n_questions,
+    config,
+    source_type,
+    to_handle_questions_index,
+    vectorstore,
+    reranker,
+    rerank_by_question=True,
+    k_final=15,
+    k_before_reranking=100
+):
     """
     Retrieve documents in parallel for all questions.
     """
     # to_handle_questions_index = [x for x in state["questions_list"] if x["source_type"] == "IPx"]
     # TODO split les questions selon le type de sources dans le state question + conditions sur le nombre de questions traités par type de source
+    # search_figures = "Figures (IPCC/IPBES)" in state["relevant_content_sources_selection"]
+    # search_only = state["search_only"]
+    # reports = state["reports"]
+    # questions_list = state["questions_list"]
+    # k_by_question = k_final // state["n_questions"]["total"]
+    # k_summary_by_question = _get_k_summary_by_question(state["n_questions"]["total"])
+    # k_images_by_question = _get_k_images_by_question(state["n_questions"]["total"])
+    k_by_question = k_final // n_questions
+    k_summary_by_question = _get_k_summary_by_question(n_questions)
+    k_images_by_question = _get_k_images_by_question(n_questions)
     k_before_reranking=100
     tasks = [
             k_by_question=k_by_question,
             k_summary_by_question=k_summary_by_question
         )
+        for i, question in enumerate(questions_list) if i in to_handle_questions_index
     ]
     results = await asyncio.gather(*tasks)
     # Combine results
         source_type = "IPx"
         IPx_questions_index = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "IPx"]
+        search_figures = "Figures (IPCC/IPBES)" in state["relevant_content_sources_selection"]
+        search_only = state["search_only"]
+        reports = state["reports"]
+        questions_list = state["questions_list"]
+        n_questions=state["n_questions"]["total"]
         state = await retrieve_documents_for_all_questions(
+            search_figures=search_figures,
+            search_only=search_only,
+            reports=reports,
+            questions_list=questions_list,
+            n_questions=n_questions,
             config=config,
             source_type=source_type,
             to_handle_questions_index=IPx_questions_index,
         source_type = "POC"
         POC_questions_index = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "POC"]
+        search_figures = "Figures (IPCC/IPBES)" in state["relevant_content_sources_selection"]
+        search_only = state["search_only"]
+        reports = state["reports"]
+        questions_list = state["questions_list"]
+        n_questions=state["n_questions"]["total"]
         state = await retrieve_documents_for_all_questions(
+            search_figures=search_figures,
+            search_only=search_only,
+            reports=reports,
+            questions_list=questions_list,
+            n_questions=n_questions,
             config=config,
             source_type=source_type,
             to_handle_questions_index=POC_questions_index,

front/tabs/chat_interface.py CHANGED Viewed

@@ -20,12 +20,31 @@ Please note that we log your questions for meta-analysis purposes, so avoid shar
 What do you want to learn ?
 """
 # UI Layout Components
-def create_chat_interface():
     chatbot = gr.Chatbot(
-        value=[ChatMessage(role="assistant", content=init_prompt)],
         type="messages",
         show_copy_button=True,
         show_label=False,

 What do you want to learn ?
 """
+init_prompt_poc = """
+Hello, I am ClimateQ&A, a conversational assistant designed to help you understand climate change and biodiversity loss. I will answer your questions by **sifting through the IPCC and IPBES scientific reports, PCAET of Paris, the Plan Biodiversité 2018-2024, and Acclimaterra reports from la Région Nouvelle-Aquitaine **.
+❓ How to use
+- **Language**: You can ask me your questions in any language.
+- **Audience**: You can specify your audience (children, general public, experts) to get a more adapted answer.
+- **Sources**: You can choose to search in the IPCC or IPBES reports, and POC sources for local documents (PCAET, Plan Biodiversité, Acclimaterra).
+- **Relevant content sources**: You can choose to search for figures, papers, or graphs that can be relevant for your question.
+⚠️ Limitations
+*Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
+🛈 Information
+Please note that we log your questions for meta-analysis purposes, so avoid sharing any sensitive or personal information.
+What do you want to learn ?
+"""
 # UI Layout Components
+def create_chat_interface(tab):
+    init_prompt_message = init_prompt_poc if tab == "Beta - POC Adapt'Action" else init_prompt
     chatbot = gr.Chatbot(
+        value=[ChatMessage(role="assistant", content=init_prompt_message)],
         type="messages",
         show_copy_button=True,
         show_label=False,

front/tabs/main_tab.py CHANGED Viewed

@@ -3,7 +3,6 @@ from .chat_interface import create_chat_interface
 from .tab_examples import create_examples_tab
 from .tab_papers import create_papers_tab
 from .tab_figures import create_figures_tab
-from .chat_interface import create_chat_interface
 def cqa_tab(tab_name):
     # State variables
@@ -12,7 +11,7 @@ def cqa_tab(tab_name):
         with gr.Row(elem_id="chatbot-row"):
             # Left column - Chat interface
             with gr.Column(scale=2):
-                chatbot, textbox, config_button = create_chat_interface()
             # Right column - Content panels
             with gr.Column(scale=2, variant="panel", elem_id="right-panel"):

 from .tab_examples import create_examples_tab
 from .tab_papers import create_papers_tab
 from .tab_figures import create_figures_tab
 def cqa_tab(tab_name):
     # State variables
         with gr.Row(elem_id="chatbot-row"):
             # Left column - Chat interface
             with gr.Column(scale=2):
+                chatbot, textbox, config_button = create_chat_interface(tab_name)
             # Right column - Content panels
             with gr.Column(scale=2, variant="panel", elem_id="right-panel"):

sandbox/20241104 - CQA - StepByStep CQA.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff