add_drias_talk_to_data

#19
by timeki - opened
.gitignore CHANGED
@@ -11,3 +11,10 @@ notebooks/
11
 
12
  data/
13
  sandbox/
 
 
 
 
 
 
 
 
11
 
12
  data/
13
  sandbox/
14
+
15
+ climateqa/talk_to_data/database/
16
+ *.db
17
+
18
+ data_ingestion/
19
+ .vscode
20
+ *old/
app.py CHANGED
@@ -1,54 +1,32 @@
1
- from climateqa.engine.embeddings import get_embeddings_function
2
- embeddings_function = get_embeddings_function()
3
-
4
- from sentence_transformers import CrossEncoder
5
-
6
- # reranker = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")
7
-
8
- import gradio as gr
9
- from gradio_modal import Modal
10
- import pandas as pd
11
- import numpy as np
12
  import os
13
- import time
14
- import re
15
- import json
16
-
17
- from gradio import ChatMessage
18
-
19
- # from gradio_modal import Modal
20
-
21
- from io import BytesIO
22
- import base64
23
 
24
- from datetime import datetime
25
  from azure.storage.fileshare import ShareServiceClient
26
 
27
- from utils import create_user_id
 
 
 
 
 
 
 
 
28
 
 
 
29
  from gradio_modal import Modal
30
 
31
- from PIL import Image
32
 
33
- from langchain_core.runnables.schema import StreamEvent
 
34
 
35
- # ClimateQ&A imports
36
- from climateqa.engine.llm import get_llm
37
- from climateqa.engine.vectorstore import get_pinecone_vectorstore
38
- # from climateqa.knowledge.retriever import ClimateQARetriever
39
- from climateqa.engine.reranker import get_reranker
40
- from climateqa.engine.embeddings import get_embeddings_function
41
- from climateqa.engine.chains.prompts import audience_prompts
42
- from climateqa.sample_questions import QUESTIONS
43
- from climateqa.constants import POSSIBLE_REPORTS, OWID_CATEGORIES
44
- from climateqa.utils import get_image_from_azure_blob_storage
45
- from climateqa.engine.graph import make_graph_agent
46
- from climateqa.engine.embeddings import get_embeddings_function
47
- from climateqa.engine.chains.retrieve_papers import find_papers
48
 
49
- from front.utils import serialize_docs,process_figures
50
 
51
- from climateqa.event_handler import init_audience, handle_retrieved_documents, stream_answer,handle_retrieved_owid_graphs
52
 
53
  # Load environment variables in local mode
54
  try:
@@ -57,7 +35,6 @@ try:
57
  except Exception as e:
58
  pass
59
 
60
- import requests
61
 
62
  # Set up Gradio Theme
63
  theme = gr.themes.Base(
@@ -66,15 +43,7 @@ theme = gr.themes.Base(
66
  font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"],
67
  )
68
 
69
-
70
-
71
- init_prompt = ""
72
-
73
- system_template = {
74
- "role": "system",
75
- "content": init_prompt,
76
- }
77
-
78
  account_key = os.environ["BLOB_ACCOUNT_KEY"]
79
  if len(account_key) == 86:
80
  account_key += "=="
@@ -92,586 +61,262 @@ share_client = service.get_share_client(file_share_name)
92
  user_id = create_user_id()
93
 
94
 
95
- CITATION_LABEL = "BibTeX citation for ClimateQ&A"
96
- CITATION_TEXT = r"""@misc{climateqa,
97
- author={Théo Alves Da Costa, Timothée Bohe},
98
- title={ClimateQ&A, AI-powered conversational assistant for climate change and biodiversity loss},
99
- year={2024},
100
- howpublished= {\url{https://climateqa.com}},
101
- }
102
- @software{climateqa,
103
- author = {Théo Alves Da Costa, Timothée Bohe},
104
- publisher = {ClimateQ&A},
105
- title = {ClimateQ&A, AI-powered conversational assistant for climate change and biodiversity loss},
106
- }
107
- """
108
-
109
-
110
 
111
  # Create vectorstore and retriever
112
- vectorstore = get_pinecone_vectorstore(embeddings_function, index_name = os.getenv("PINECONE_API_INDEX"))
113
- vectorstore_graphs = get_pinecone_vectorstore(embeddings_function, index_name = os.getenv("PINECONE_API_INDEX_OWID"), text_key="description")
 
 
114
 
115
  llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
116
- reranker = get_reranker("nano")
117
-
118
- agent = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, reranker=reranker)
119
-
120
- def update_config_modal_visibility(config_open):
121
- new_config_visibility_status = not config_open
122
- return gr.update(visible=new_config_visibility_status), new_config_visibility_status
123
-
124
- async def chat(query, history, audience, sources, reports, relevant_content_sources, search_only):
125
- """taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of:
126
- (messages in gradio format, messages in langchain format, source documents)"""
127
-
128
- date_now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
129
- print(f">> NEW QUESTION ({date_now}) : {query}")
130
-
131
- audience_prompt = init_audience(audience)
132
-
133
- # Prepare default values
134
- if sources is None or len(sources) == 0:
135
- sources = ["IPCC", "IPBES", "IPOS"]
136
-
137
- if reports is None or len(reports) == 0:
138
- reports = []
139
-
140
- inputs = {"user_input": query,"audience": audience_prompt,"sources_input":sources, "relevant_content_sources" : relevant_content_sources, "search_only": search_only}
141
- result = agent.astream_events(inputs,version = "v1")
142
-
143
-
144
- docs = []
145
- used_figures=[]
146
- related_contents = []
147
- docs_html = ""
148
- output_query = ""
149
- output_language = ""
150
- output_keywords = ""
151
- start_streaming = False
152
- graphs_html = ""
153
- figures = '<div class="figures-container"><p></p> </div>'
154
-
155
- steps_display = {
156
- "categorize_intent":("🔄️ Analyzing user message",True),
157
- "transform_query":("🔄️ Thinking step by step to answer the question",True),
158
- "retrieve_documents":("🔄️ Searching in the knowledge base",False),
159
- }
160
-
161
- used_documents = []
162
- answer_message_content = ""
163
- try:
164
- async for event in result:
165
- if "langgraph_node" in event["metadata"]:
166
- node = event["metadata"]["langgraph_node"]
167
-
168
- if event["event"] == "on_chain_end" and event["name"] == "retrieve_documents" :# when documents are retrieved
169
- docs, docs_html, history, used_documents, related_contents = handle_retrieved_documents(event, history, used_documents)
170
-
171
- elif event["event"] == "on_chain_end" and node == "categorize_intent" and event["name"] == "_write": # when the query is transformed
172
-
173
- intent = event["data"]["output"]["intent"]
174
- if "language" in event["data"]["output"]:
175
- output_language = event["data"]["output"]["language"]
176
- else :
177
- output_language = "English"
178
- history[-1].content = f"Language identified : {output_language} \n Intent identified : {intent}"
179
-
180
-
181
- elif event["name"] in steps_display.keys() and event["event"] == "on_chain_start": #display steps
182
- event_description, display_output = steps_display[node]
183
- if not hasattr(history[-1], 'metadata') or history[-1].metadata["title"] != event_description: # if a new step begins
184
- history.append(ChatMessage(role="assistant", content = "", metadata={'title' :event_description}))
185
-
186
- elif event["name"] != "transform_query" and event["event"] == "on_chat_model_stream" and node in ["answer_rag", "answer_search","answer_chitchat"]:# if streaming answer
187
- history, start_streaming, answer_message_content = stream_answer(history, event, start_streaming, answer_message_content)
188
-
189
- elif event["name"] in ["retrieve_graphs", "retrieve_graphs_ai"] and event["event"] == "on_chain_end":
190
- graphs_html = handle_retrieved_owid_graphs(event, graphs_html)
191
-
192
-
193
- if event["name"] == "transform_query" and event["event"] =="on_chain_end":
194
- if hasattr(history[-1],"content"):
195
- history[-1].content += "Decompose question into sub-questions: \n\n - " + "\n - ".join([q["question"] for q in event["data"]["output"]["remaining_questions"]])
196
-
197
- if event["name"] == "categorize_intent" and event["event"] == "on_chain_start":
198
- print("X")
199
-
200
- yield history, docs_html, output_query, output_language, related_contents , graphs_html, #,output_query,output_keywords
201
-
202
- except Exception as e:
203
- print(event, "has failed")
204
- raise gr.Error(f"{e}")
205
-
206
-
207
- try:
208
- # Log answer on Azure Blob Storage
209
- if os.getenv("GRADIO_ENV") != "local":
210
- timestamp = str(datetime.now().timestamp())
211
- file = timestamp + ".json"
212
- prompt = history[1]["content"]
213
- logs = {
214
- "user_id": str(user_id),
215
- "prompt": prompt,
216
- "query": prompt,
217
- "question":output_query,
218
- "sources":sources,
219
- "docs":serialize_docs(docs),
220
- "answer": history[-1].content,
221
- "time": timestamp,
222
- }
223
- log_on_azure(file, logs, share_client)
224
- except Exception as e:
225
- print(f"Error logging on Azure Blob Storage: {e}")
226
- raise gr.Error(f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)")
227
-
228
- yield history, docs_html, output_query, output_language, related_contents, graphs_html
229
-
230
-
231
- def save_feedback(feed: str, user_id):
232
- if len(feed) > 1:
233
- timestamp = str(datetime.now().timestamp())
234
- file = user_id + timestamp + ".json"
235
- logs = {
236
- "user_id": user_id,
237
- "feedback": feed,
238
- "time": timestamp,
239
- }
240
- log_on_azure(file, logs, share_client)
241
- return "Feedback submitted, thank you!"
242
-
243
-
244
-
245
-
246
- def log_on_azure(file, logs, share_client):
247
- logs = json.dumps(logs)
248
- file_client = share_client.get_file_client(file)
249
- file_client.upload_file(logs)
250
 
 
 
251
 
252
 
 
 
 
 
 
 
 
 
 
253
 
254
 
255
  # --------------------------------------------------------------------
256
  # Gradio
257
  # --------------------------------------------------------------------
258
 
 
 
 
 
 
259
 
260
- init_prompt = """
261
- Hello, I am ClimateQ&A, a conversational assistant designed to help you understand climate change and biodiversity loss. I will answer your questions by **sifting through the IPCC and IPBES scientific reports**.
262
-
263
- How to use
264
- - **Language**: You can ask me your questions in any language.
265
- - **Audience**: You can specify your audience (children, general public, experts) to get a more adapted answer.
266
- - **Sources**: You can choose to search in the IPCC or IPBES reports, or both.
267
- - **Relevant content sources**: You can choose to search for figures, papers, or graphs that can be relevant for your question.
268
-
269
- ⚠️ Limitations
270
- *Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
271
-
272
- 🛈 Information
273
- Please note that we log your questions for meta-analysis purposes, so avoid sharing any sensitive or personal information.
274
-
275
-
276
- What do you want to learn ?
277
- """
278
-
279
-
280
- def vote(data: gr.LikeData):
281
- if data.liked:
282
- print(data.value)
283
- else:
284
- print(data)
285
-
286
- def save_graph(saved_graphs_state, embedding, category):
287
- print(f"\nCategory:\n{saved_graphs_state}\n")
288
- if category not in saved_graphs_state:
289
- saved_graphs_state[category] = []
290
- if embedding not in saved_graphs_state[category]:
291
- saved_graphs_state[category].append(embedding)
292
- return saved_graphs_state, gr.Button("Graph Saved")
293
 
 
294
 
295
 
296
- with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=theme,elem_id = "main-component") as demo:
297
- chat_completed_state = gr.State(0)
 
298
  current_graphs = gr.State([])
299
- saved_graphs = gr.State({})
300
- config_open = gr.State(False)
301
-
302
-
303
- with gr.Tab("ClimateQ&A"):
304
-
305
  with gr.Row(elem_id="chatbot-row"):
 
306
  with gr.Column(scale=2):
307
- chatbot = gr.Chatbot(
308
- value = [ChatMessage(role="assistant", content=init_prompt)],
309
- type = "messages",
310
- show_copy_button=True,
311
- show_label = False,
312
- elem_id="chatbot",
313
- layout = "panel",
314
- avatar_images = (None,"https://i.ibb.co/YNyd5W2/logo4.png"),
315
- max_height="80vh",
316
- height="100vh"
317
- )
318
-
319
- # bot.like(vote,None,None)
320
-
321
-
322
-
323
- with gr.Row(elem_id = "input-message"):
324
- textbox=gr.Textbox(placeholder="Ask me anything here!",show_label=False,scale=7,lines = 1,interactive = True,elem_id="input-textbox")
325
-
326
- config_button = gr.Button("",elem_id="config-button")
327
- # config_checkbox_button = gr.Checkbox(label = '⚙️', value="show",visible=True, interactive=True, elem_id="checkbox-config")
328
-
329
-
330
-
331
- with gr.Column(scale=2, variant="panel",elem_id = "right-panel"):
332
-
333
-
334
- with gr.Tabs(elem_id = "right_panel_tab") as tabs:
335
- with gr.TabItem("Examples",elem_id = "tab-examples",id = 0):
336
-
337
- examples_hidden = gr.Textbox(visible = False)
338
- first_key = list(QUESTIONS.keys())[0]
339
- dropdown_samples = gr.Dropdown(QUESTIONS.keys(),value = first_key,interactive = True,show_label = True,label = "Select a category of sample questions",elem_id = "dropdown-samples")
340
-
341
- samples = []
342
- for i,key in enumerate(QUESTIONS.keys()):
343
-
344
- examples_visible = True if i == 0 else False
345
-
346
- with gr.Row(visible = examples_visible) as group_examples:
347
-
348
- examples_questions = gr.Examples(
349
- QUESTIONS[key],
350
- [examples_hidden],
351
- examples_per_page=8,
352
- run_on_click=False,
353
- elem_id=f"examples{i}",
354
- api_name=f"examples{i}",
355
- # label = "Click on the example question or enter your own",
356
- # cache_examples=True,
357
- )
358
-
359
- samples.append(group_examples)
360
-
361
- # with gr.Tab("Configuration", id = 10, ) as tab_config:
362
- # # gr.Markdown("Reminders: You can talk in any language, ClimateQ&A is multi-lingual!")
363
-
364
- # pass
365
-
366
- # with gr.Row():
367
-
368
- # dropdown_sources = gr.CheckboxGroup(
369
- # ["IPCC", "IPBES","IPOS"],
370
- # label="Select source",
371
- # value=["IPCC"],
372
- # interactive=True,
373
- # )
374
- # dropdown_external_sources = gr.CheckboxGroup(
375
- # ["IPCC figures","OpenAlex", "OurWorldInData"],
376
- # label="Select database to search for relevant content",
377
- # value=["IPCC figures"],
378
- # interactive=True,
379
- # )
380
-
381
- # dropdown_reports = gr.Dropdown(
382
- # POSSIBLE_REPORTS,
383
- # label="Or select specific reports",
384
- # multiselect=True,
385
- # value=None,
386
- # interactive=True,
387
- # )
388
-
389
- # search_only = gr.Checkbox(label="Search only without chating", value=False, interactive=True, elem_id="checkbox-chat")
390
-
391
-
392
- # dropdown_audience = gr.Dropdown(
393
- # ["Children","General public","Experts"],
394
- # label="Select audience",
395
- # value="Experts",
396
- # interactive=True,
397
- # )
398
-
399
-
400
- # after = gr.Slider(minimum=1950,maximum=2023,step=1,value=1960,label="Publication date",show_label=True,interactive=True,elem_id="date-papers", visible=False)
401
-
402
 
403
- # output_query = gr.Textbox(label="Query used for retrieval",show_label = True,elem_id = "reformulated-query",lines = 2,interactive = False, visible= False)
404
- # output_language = gr.Textbox(label="Language",show_label = True,elem_id = "language",lines = 1,interactive = False, visible= False)
 
 
 
 
405
 
 
 
 
406
 
407
- # dropdown_external_sources.change(lambda x: gr.update(visible = True ) if "OpenAlex" in x else gr.update(visible=False) , inputs=[dropdown_external_sources], outputs=[after])
408
- # # dropdown_external_sources.change(lambda x: gr.update(visible = True ) if "OpenAlex" in x else gr.update(visible=False) , inputs=[dropdown_external_sources], outputs=[after], visible=True)
409
 
 
 
 
 
 
 
410
 
411
- with gr.Tab("Sources",elem_id = "tab-sources",id = 1) as tab_sources:
412
- sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
413
-
414
-
415
-
416
- with gr.Tab("Recommended content", elem_id="tab-recommended_content",id=2) as tab_recommended_content:
417
- with gr.Tabs(elem_id = "group-subtabs") as tabs_recommended_content:
418
-
419
- with gr.Tab("Figures",elem_id = "tab-figures",id = 3) as tab_figures:
420
- sources_raw = gr.State()
421
-
422
- with Modal(visible=False, elem_id="modal_figure_galery") as figure_modal:
423
- gallery_component = gr.Gallery(object_fit='scale-down',elem_id="gallery-component", height="80vh")
424
-
425
- show_full_size_figures = gr.Button("Show figures in full size",elem_id="show-figures",interactive=True)
426
- show_full_size_figures.click(lambda : Modal(visible=True),None,figure_modal)
427
-
428
- figures_cards = gr.HTML(show_label=False, elem_id="sources-figures")
429
-
430
-
431
-
432
- with gr.Tab("Papers",elem_id = "tab-citations",id = 4) as tab_papers:
433
- # btn_summary = gr.Button("Summary")
434
- # Fenêtre simulée pour le Summary
435
- with gr.Accordion(visible=True, elem_id="papers-summary-popup", label= "See summary of relevant papers", open= False) as summary_popup:
436
- papers_summary = gr.Markdown("", visible=True, elem_id="papers-summary")
437
-
438
- # btn_relevant_papers = gr.Button("Relevant papers")
439
- # Fenêtre simulée pour les Relevant Papers
440
- with gr.Accordion(visible=True, elem_id="papers-relevant-popup",label= "See relevant papers", open= False) as relevant_popup:
441
- papers_html = gr.HTML(show_label=False, elem_id="papers-textbox")
442
-
443
- btn_citations_network = gr.Button("Explore papers citations network")
444
- # Fenêtre simulée pour le Citations Network
445
- with Modal(visible=False) as papers_modal:
446
- citations_network = gr.HTML("<h3>Citations Network Graph</h3>", visible=True, elem_id="papers-citations-network")
447
- btn_citations_network.click(lambda: Modal(visible=True), None, papers_modal)
448
-
449
-
450
-
451
  with gr.Tab("Graphs", elem_id="tab-graphs", id=5) as tab_graphs:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
 
453
- graphs_container = gr.HTML("<h2>There are no graphs to be displayed at the moment. Try asking another question.</h2>",elem_id="graphs-container")
454
- current_graphs.change(lambda x : x, inputs=[current_graphs], outputs=[graphs_container])
455
-
456
- with Modal(visible=False,elem_id="modal-config") as config_modal:
457
- gr.Markdown("Reminders: You can talk in any language, ClimateQ&A is multi-lingual!")
458
-
459
-
460
- # with gr.Row():
461
-
462
- dropdown_sources = gr.CheckboxGroup(
463
- ["IPCC", "IPBES","IPOS"],
464
- label="Select source (by default search in all sources)",
465
- value=["IPCC"],
466
- interactive=True,
467
- )
468
-
469
- dropdown_reports = gr.Dropdown(
470
- POSSIBLE_REPORTS,
471
- label="Or select specific reports",
472
- multiselect=True,
473
- value=None,
474
- interactive=True,
475
- )
 
 
 
476
 
477
- dropdown_external_sources = gr.CheckboxGroup(
478
- ["IPCC figures","OpenAlex", "OurWorldInData"],
479
- label="Select database to search for relevant content",
480
- value=["IPCC figures"],
481
- interactive=True,
482
- )
483
-
484
- search_only = gr.Checkbox(label="Search only for recommended content without chating", value=False, interactive=True, elem_id="checkbox-chat")
485
-
486
-
487
- dropdown_audience = gr.Dropdown(
488
- ["Children","General public","Experts"],
489
- label="Select audience",
490
- value="Experts",
491
- interactive=True,
492
- )
493
-
494
-
495
- after = gr.Slider(minimum=1950,maximum=2023,step=1,value=1960,label="Publication date",show_label=True,interactive=True,elem_id="date-papers", visible=False)
496
-
497
-
498
- output_query = gr.Textbox(label="Query used for retrieval",show_label = True,elem_id = "reformulated-query",lines = 2,interactive = False, visible= False)
499
- output_language = gr.Textbox(label="Language",show_label = True,elem_id = "language",lines = 1,interactive = False, visible= False)
500
-
501
-
502
- dropdown_external_sources.change(lambda x: gr.update(visible = True ) if "OpenAlex" in x else gr.update(visible=False) , inputs=[dropdown_external_sources], outputs=[after])
503
-
504
- close_config_modal = gr.Button("Validate and Close",elem_id="close-config-modal")
505
- close_config_modal.click(fn=update_config_modal_visibility, inputs=[config_open], outputs=[config_modal, config_open])
506
- # dropdown_external_sources.change(lambda x: gr.update(visible = True ) if "OpenAlex" in x else gr.update(visible=False) , inputs=[dropdown_external_sources], outputs=[after], visible=True)
507
-
508
-
509
-
510
- config_button.click(fn=update_config_modal_visibility, inputs=[config_open], outputs=[config_modal, config_open])
511
-
512
- # with gr.Tab("OECD",elem_id = "tab-oecd",id = 6):
513
- # oecd_indicator = "RIVER_FLOOD_RP100_POP_SH"
514
- # oecd_topic = "climate"
515
- # oecd_latitude = "46.8332"
516
- # oecd_longitude = "5.3725"
517
- # oecd_zoom = "5.6442"
518
- # # Create the HTML content with the iframe
519
- # iframe_html = f"""
520
- # <iframe src="https://localdataportal.oecd.org/maps.html?indicator={oecd_indicator}&topic={oecd_topic}&latitude={oecd_latitude}&longitude={oecd_longitude}&zoom={oecd_zoom}"
521
- # width="100%" height="600" frameborder="0" style="border:0;" allowfullscreen></iframe>
522
- # """
523
- # oecd_textbox = gr.HTML(iframe_html, show_label=False, elem_id="oecd-textbox")
524
-
525
-
526
-
527
-
528
- #---------------------------------------------------------------------------------------
529
- # OTHER TABS
530
- #---------------------------------------------------------------------------------------
531
-
532
- # with gr.Tab("Settings",elem_id = "tab-config",id = 2):
533
-
534
- # gr.Markdown("Reminder: You can talk in any language, ClimateQ&A is multi-lingual!")
535
-
536
-
537
- # dropdown_sources = gr.CheckboxGroup(
538
- # ["IPCC", "IPBES","IPOS", "OpenAlex"],
539
- # label="Select source",
540
- # value=["IPCC"],
541
- # interactive=True,
542
- # )
543
-
544
- # dropdown_reports = gr.Dropdown(
545
- # POSSIBLE_REPORTS,
546
- # label="Or select specific reports",
547
- # multiselect=True,
548
- # value=None,
549
- # interactive=True,
550
- # )
551
-
552
- # dropdown_audience = gr.Dropdown(
553
- # ["Children","General public","Experts"],
554
- # label="Select audience",
555
- # value="Experts",
556
- # interactive=True,
557
- # )
558
-
559
-
560
- # output_query = gr.Textbox(label="Query used for retrieval",show_label = True,elem_id = "reformulated-query",lines = 2,interactive = False)
561
- # output_language = gr.Textbox(label="Language",show_label = True,elem_id = "language",lines = 1,interactive = False)
562
-
563
-
564
- with gr.Tab("About",elem_classes = "max-height other-tabs"):
565
- with gr.Row():
566
- with gr.Column(scale=1):
567
-
568
-
569
-
570
-
571
- gr.Markdown(
572
- """
573
- ### More info
574
- - See more info at [https://climateqa.com](https://climateqa.com/docs/intro/)
575
- - Feedbacks on this [form](https://forms.office.com/e/1Yzgxm6jbp)
576
-
577
- ### Citation
578
- """
579
- )
580
- with gr.Accordion(CITATION_LABEL,elem_id="citation", open = False,):
581
- # # Display citation label and text)
582
- gr.Textbox(
583
- value=CITATION_TEXT,
584
- label="",
585
- interactive=False,
586
- show_copy_button=True,
587
- lines=len(CITATION_TEXT.split('\n')),
588
- )
589
-
590
-
591
-
592
- def start_chat(query,history,search_only):
593
- history = history + [ChatMessage(role="user", content=query)]
594
- if not search_only:
595
- return (gr.update(interactive = False),gr.update(selected=1),history)
596
- else:
597
- return (gr.update(interactive = False),gr.update(selected=2),history)
598
 
599
- def finish_chat():
600
- return gr.update(interactive = True,value = "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
601
 
602
- # Initialize visibility states
603
- summary_visible = False
604
- relevant_visible = False
605
-
606
- # Functions to toggle visibility
607
- def toggle_summary_visibility():
608
- global summary_visible
609
- summary_visible = not summary_visible
610
- return gr.update(visible=summary_visible)
611
-
612
- def toggle_relevant_visibility():
613
- global relevant_visible
614
- relevant_visible = not relevant_visible
615
- return gr.update(visible=relevant_visible)
616
-
617
-
618
- def change_completion_status(current_state):
619
- current_state = 1 - current_state
620
- return current_state
621
 
622
- def update_sources_number_display(sources_textbox, figures_cards, current_graphs, papers_html):
623
- sources_number = sources_textbox.count("<h2>")
624
- figures_number = figures_cards.count("<h2>")
625
- graphs_number = current_graphs.count("<iframe")
626
- papers_number = papers_html.count("<h2>")
627
- sources_notif_label = f"Sources ({sources_number})"
628
- figures_notif_label = f"Figures ({figures_number})"
629
- graphs_notif_label = f"Graphs ({graphs_number})"
630
- papers_notif_label = f"Papers ({papers_number})"
631
- recommended_content_notif_label = f"Recommended content ({figures_number + graphs_number + papers_number})"
632
-
633
- return gr.update(label = recommended_content_notif_label), gr.update(label = sources_notif_label), gr.update(label = figures_notif_label), gr.update(label = graphs_notif_label), gr.update(label = papers_notif_label)
634
 
635
- (textbox
636
- .submit(start_chat, [textbox,chatbot, search_only], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
637
- .then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports, dropdown_external_sources, search_only] ,[chatbot,sources_textbox,output_query,output_language, sources_raw, current_graphs],concurrency_limit = 8,api_name = "chat_textbox")
638
- .then(finish_chat, None, [textbox],api_name = "finish_chat_textbox")
639
- # .then(update_sources_number_display, [sources_textbox, figures_cards, current_graphs,papers_html],[tab_sources, tab_figures, tab_graphs, tab_papers] )
640
- )
641
-
642
- (examples_hidden
643
- .change(start_chat, [examples_hidden,chatbot, search_only], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
644
- .then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports, dropdown_external_sources, search_only] ,[chatbot,sources_textbox,output_query,output_language, sources_raw, current_graphs],concurrency_limit = 8,api_name = "chat_textbox")
645
- .then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
646
- # .then(update_sources_number_display, [sources_textbox, figures_cards, current_graphs,papers_html],[tab_sources, tab_figures, tab_graphs, tab_papers] )
647
- )
648
-
649
-
650
- def change_sample_questions(key):
651
- index = list(QUESTIONS.keys()).index(key)
652
- visible_bools = [False] * len(samples)
653
- visible_bools[index] = True
654
- return [gr.update(visible=visible_bools[i]) for i in range(len(samples))]
655
-
656
-
657
- sources_raw.change(process_figures, inputs=[sources_raw], outputs=[figures_cards, gallery_component])
658
 
659
- # update sources numbers
660
- sources_textbox.change(update_sources_number_display, [sources_textbox, figures_cards, current_graphs,papers_html],[tab_recommended_content, tab_sources, tab_figures, tab_graphs, tab_papers])
661
- figures_cards.change(update_sources_number_display, [sources_textbox, figures_cards, current_graphs,papers_html],[tab_recommended_content, tab_sources, tab_figures, tab_graphs, tab_papers])
662
- current_graphs.change(update_sources_number_display, [sources_textbox, figures_cards, current_graphs,papers_html],[tab_recommended_content, tab_sources, tab_figures, tab_graphs, tab_papers])
663
- papers_html.change(update_sources_number_display, [sources_textbox, figures_cards, current_graphs,papers_html],[tab_recommended_content, tab_sources, tab_figures, tab_graphs, tab_papers])
664
-
665
- # other questions examples
666
- dropdown_samples.change(change_sample_questions,dropdown_samples,samples)
667
-
668
- # search for papers
669
- textbox.submit(find_papers,[textbox,after, dropdown_external_sources], [papers_html,citations_network,papers_summary])
670
- examples_hidden.change(find_papers,[examples_hidden,after,dropdown_external_sources], [papers_html,citations_network,papers_summary])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
671
 
672
- # btn_summary.click(toggle_summary_visibility, outputs=summary_popup)
673
- # btn_relevant_papers.click(toggle_relevant_visibility, outputs=relevant_popup)
 
 
 
 
 
 
 
674
 
675
- demo.queue()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
676
 
 
 
677
  demo.launch(ssr_mode=False)
 
1
+ # Import necessary libraries
 
 
 
 
 
 
 
 
 
 
2
  import os
3
+ import gradio as gr
 
 
 
 
 
 
 
 
 
4
 
 
5
  from azure.storage.fileshare import ShareServiceClient
6
 
7
+ # Import custom modules
8
+ from climateqa.engine.embeddings import get_embeddings_function
9
+ from climateqa.engine.llm import get_llm
10
+ from climateqa.engine.vectorstore import get_pinecone_vectorstore
11
+ from climateqa.engine.reranker import get_reranker
12
+ from climateqa.engine.graph import make_graph_agent,make_graph_agent_poc
13
+ from climateqa.engine.chains.retrieve_papers import find_papers
14
+ from climateqa.chat import start_chat, chat_stream, finish_chat
15
+ from climateqa.engine.talk_to_data.main import ask_vanna
16
 
17
+ from front.tabs import (create_config_modal, create_examples_tab, create_papers_tab, create_figures_tab, create_chat_interface, create_about_tab)
18
+ from front.utils import process_figures
19
  from gradio_modal import Modal
20
 
 
21
 
22
+ from utils import create_user_id
23
+ import logging
24
 
25
+ logging.basicConfig(level=logging.WARNING)
26
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Suppresses INFO and WARNING logs
27
+ logging.getLogger().setLevel(logging.WARNING)
 
 
 
 
 
 
 
 
 
 
28
 
 
29
 
 
30
 
31
  # Load environment variables in local mode
32
  try:
 
35
  except Exception as e:
36
  pass
37
 
 
38
 
39
  # Set up Gradio Theme
40
  theme = gr.themes.Base(
 
43
  font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"],
44
  )
45
 
46
+ # Azure Blob Storage credentials
 
 
 
 
 
 
 
 
47
  account_key = os.environ["BLOB_ACCOUNT_KEY"]
48
  if len(account_key) == 86:
49
  account_key += "=="
 
61
  user_id = create_user_id()
62
 
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  # Create vectorstore and retriever
66
+ embeddings_function = get_embeddings_function()
67
+ vectorstore = get_pinecone_vectorstore(embeddings_function, index_name=os.getenv("PINECONE_API_INDEX"))
68
+ vectorstore_graphs = get_pinecone_vectorstore(embeddings_function, index_name=os.getenv("PINECONE_API_INDEX_OWID"), text_key="description")
69
+ vectorstore_region = get_pinecone_vectorstore(embeddings_function, index_name=os.getenv("PINECONE_API_INDEX_REGION"))
70
 
71
  llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
72
+ if os.environ["GRADIO_ENV"] == "local":
73
+ reranker = get_reranker("nano")
74
+ else :
75
+ reranker = get_reranker("large")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ agent = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, vectorstore_region = vectorstore_region, reranker=reranker, threshold_docs=0.2)
78
+ agent_poc = make_graph_agent_poc(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, vectorstore_region = vectorstore_region, reranker=reranker, threshold_docs=0)#TODO put back default 0.2
79
 
80
 
81
+ async def chat(query, history, audience, sources, reports, relevant_content_sources_selection, search_only):
82
+ print("chat cqa - message received")
83
+ async for event in chat_stream(agent, query, history, audience, sources, reports, relevant_content_sources_selection, search_only, share_client, user_id):
84
+ yield event
85
+
86
+ async def chat_poc(query, history, audience, sources, reports, relevant_content_sources_selection, search_only):
87
+ print("chat poc - message received")
88
+ async for event in chat_stream(agent_poc, query, history, audience, sources, reports, relevant_content_sources_selection, search_only, share_client, user_id):
89
+ yield event
90
 
91
 
92
  # --------------------------------------------------------------------
93
  # Gradio
94
  # --------------------------------------------------------------------
95
 
96
+ # Function to update modal visibility
97
+ def update_config_modal_visibility(config_open):
98
+ new_config_visibility_status = not config_open
99
+ return Modal(visible=new_config_visibility_status), new_config_visibility_status
100
+
101
 
102
+ def update_sources_number_display(sources_textbox, figures_cards, current_graphs, papers_html):
103
+ sources_number = sources_textbox.count("<h2>")
104
+ figures_number = figures_cards.count("<h2>")
105
+ graphs_number = current_graphs.count("<iframe")
106
+ papers_number = papers_html.count("<h2>")
107
+ sources_notif_label = f"Sources ({sources_number})"
108
+ figures_notif_label = f"Figures ({figures_number})"
109
+ graphs_notif_label = f"Graphs ({graphs_number})"
110
+ papers_notif_label = f"Papers ({papers_number})"
111
+ recommended_content_notif_label = f"Recommended content ({figures_number + graphs_number + papers_number})"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
+ return gr.update(label=recommended_content_notif_label), gr.update(label=sources_notif_label), gr.update(label=figures_notif_label), gr.update(label=graphs_notif_label), gr.update(label=papers_notif_label)
114
 
115
 
116
+ # # UI Layout Components
117
+ def cqa_tab(tab_name):
118
+ # State variables
119
  current_graphs = gr.State([])
120
+ with gr.Tab(tab_name):
 
 
 
 
 
121
  with gr.Row(elem_id="chatbot-row"):
122
+ # Left column - Chat interface
123
  with gr.Column(scale=2):
124
+ chatbot, textbox, config_button = create_chat_interface(tab_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
+ # Right column - Content panels
127
+ with gr.Column(scale=2, variant="panel", elem_id="right-panel"):
128
+ with gr.Tabs(elem_id="right_panel_tab") as tabs:
129
+ # Examples tab
130
+ with gr.TabItem("Examples", elem_id="tab-examples", id=0):
131
+ examples_hidden = create_examples_tab()
132
 
133
+ # Sources tab
134
+ with gr.Tab("Sources", elem_id="tab-sources", id=1) as tab_sources:
135
+ sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
136
 
 
 
137
 
138
+ # Recommended content tab
139
+ with gr.Tab("Recommended content", elem_id="tab-recommended_content", id=2) as tab_recommended_content:
140
+ with gr.Tabs(elem_id="group-subtabs") as tabs_recommended_content:
141
+ # Figures subtab
142
+ with gr.Tab("Figures", elem_id="tab-figures", id=3) as tab_figures:
143
+ sources_raw, new_figures, used_figures, gallery_component, figures_cards, figure_modal = create_figures_tab()
144
 
145
+ # Papers subtab
146
+ with gr.Tab("Papers", elem_id="tab-citations", id=4) as tab_papers:
147
+ papers_direct_search, papers_summary, papers_html, citations_network, papers_modal = create_papers_tab()
148
+
149
+ # Graphs subtab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  with gr.Tab("Graphs", elem_id="tab-graphs", id=5) as tab_graphs:
151
+ graphs_container = gr.HTML(
152
+ "<h2>There are no graphs to be displayed at the moment. Try asking another question.</h2>",
153
+ elem_id="graphs-container"
154
+ )
155
+ with gr.Tab("DRIAS", elem_id="tab-vanna", id=6) as tab_vanna:
156
+ vanna_direct_question = gr.Textbox(label="Direct Question", placeholder="You can write direct question here",elem_id="direct-question", interactive=True)
157
+ with gr.Accordion("Details",elem_id = 'vanna-details', open=False) as vanna_details :
158
+ vanna_sql_query = gr.Textbox(label="SQL Query Used", elem_id="sql-query", interactive=False)
159
+ show_vanna_table = gr.Button("Show Table", elem_id="show-table")
160
+ with Modal(visible=False) as vanna_table_modal:
161
+ vanna_table = gr.DataFrame([], elem_id="vanna-table")
162
+ close_vanna_modal = gr.Button("Close", elem_id="close-vanna-modal")
163
+ close_vanna_modal.click(lambda: Modal(visible=False),None, [vanna_table_modal])
164
+ show_vanna_table.click(lambda: Modal(visible=True),None ,[vanna_table_modal])
165
+
166
+ vanna_display = gr.Plot()
167
+ vanna_direct_question.submit(ask_vanna, [vanna_direct_question], [vanna_sql_query ,vanna_table, vanna_display])
168
 
169
+ return {
170
+ "chatbot": chatbot,
171
+ "textbox": textbox,
172
+ "tabs": tabs,
173
+ "sources_raw": sources_raw,
174
+ "new_figures": new_figures,
175
+ "current_graphs": current_graphs,
176
+ "examples_hidden": examples_hidden,
177
+ "sources_textbox": sources_textbox,
178
+ "figures_cards": figures_cards,
179
+ "gallery_component": gallery_component,
180
+ "config_button": config_button,
181
+ "papers_direct_search" : papers_direct_search,
182
+ "papers_html": papers_html,
183
+ "citations_network": citations_network,
184
+ "papers_summary": papers_summary,
185
+ "tab_recommended_content": tab_recommended_content,
186
+ "tab_sources": tab_sources,
187
+ "tab_figures": tab_figures,
188
+ "tab_graphs": tab_graphs,
189
+ "tab_papers": tab_papers,
190
+ "graph_container": graphs_container,
191
+ "vanna_sql_query": vanna_sql_query,
192
+ "vanna_table" : vanna_table,
193
+ "vanna_display": vanna_display
194
+ }
195
 
196
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
+ def event_handling(
199
+ main_tab_components,
200
+ config_components,
201
+ tab_name="ClimateQ&A"
202
+ ):
203
+ chatbot = main_tab_components["chatbot"]
204
+ textbox = main_tab_components["textbox"]
205
+ tabs = main_tab_components["tabs"]
206
+ sources_raw = main_tab_components["sources_raw"]
207
+ new_figures = main_tab_components["new_figures"]
208
+ current_graphs = main_tab_components["current_graphs"]
209
+ examples_hidden = main_tab_components["examples_hidden"]
210
+ sources_textbox = main_tab_components["sources_textbox"]
211
+ figures_cards = main_tab_components["figures_cards"]
212
+ gallery_component = main_tab_components["gallery_component"]
213
+ config_button = main_tab_components["config_button"]
214
+ papers_direct_search = main_tab_components["papers_direct_search"]
215
+ papers_html = main_tab_components["papers_html"]
216
+ citations_network = main_tab_components["citations_network"]
217
+ papers_summary = main_tab_components["papers_summary"]
218
+ tab_recommended_content = main_tab_components["tab_recommended_content"]
219
+ tab_sources = main_tab_components["tab_sources"]
220
+ tab_figures = main_tab_components["tab_figures"]
221
+ tab_graphs = main_tab_components["tab_graphs"]
222
+ tab_papers = main_tab_components["tab_papers"]
223
+ graphs_container = main_tab_components["graph_container"]
224
+ vanna_sql_query = main_tab_components["vanna_sql_query"]
225
+ vanna_table = main_tab_components["vanna_table"]
226
+ vanna_display = main_tab_components["vanna_display"]
227
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
+ config_open = config_components["config_open"]
230
+ config_modal = config_components["config_modal"]
231
+ dropdown_sources = config_components["dropdown_sources"]
232
+ dropdown_reports = config_components["dropdown_reports"]
233
+ dropdown_external_sources = config_components["dropdown_external_sources"]
234
+ search_only = config_components["search_only"]
235
+ dropdown_audience = config_components["dropdown_audience"]
236
+ after = config_components["after"]
237
+ output_query = config_components["output_query"]
238
+ output_language = config_components["output_language"]
239
+ close_config_modal = config_components["close_config_modal_button"]
 
240
 
241
+ new_sources_hmtl = gr.State([])
242
+ ttd_data = gr.State([])
243
+
244
+
245
+ for button in [config_button, close_config_modal]:
246
+ button.click(
247
+ fn=update_config_modal_visibility,
248
+ inputs=[config_open],
249
+ outputs=[config_modal, config_open]
250
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
252
+ if tab_name == "ClimateQ&A":
253
+ print("chat cqa - message sent")
254
+
255
+ # Event for textbox
256
+ (textbox
257
+ .submit(start_chat, [textbox, chatbot, search_only], [textbox, tabs, chatbot, sources_raw], queue=False, api_name=f"start_chat_{textbox.elem_id}")
258
+ .then(chat, [textbox, chatbot, dropdown_audience, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only], [chatbot, new_sources_hmtl, output_query, output_language, new_figures, current_graphs], concurrency_limit=8, api_name=f"chat_{textbox.elem_id}")
259
+ .then(finish_chat, None, [textbox], api_name=f"finish_chat_{textbox.elem_id}")
260
+ )
261
+ # Event for examples_hidden
262
+ (examples_hidden
263
+ .change(start_chat, [examples_hidden, chatbot, search_only], [examples_hidden, tabs, chatbot, sources_raw], queue=False, api_name=f"start_chat_{examples_hidden.elem_id}")
264
+ .then(chat, [examples_hidden, chatbot, dropdown_audience, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only], [chatbot, new_sources_hmtl, output_query, output_language, new_figures, current_graphs], concurrency_limit=8, api_name=f"chat_{examples_hidden.elem_id}")
265
+ .then(finish_chat, None, [textbox], api_name=f"finish_chat_{examples_hidden.elem_id}")
266
+ )
267
+
268
+ elif tab_name == "Beta - POC Adapt'Action":
269
+ print("chat poc - message sent")
270
+ # Event for textbox
271
+ (textbox
272
+ .submit(start_chat, [textbox, chatbot, search_only], [textbox, tabs, chatbot, sources_raw], queue=False, api_name=f"start_chat_{textbox.elem_id}")
273
+ .then(chat_poc, [textbox, chatbot, dropdown_audience, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only], [chatbot, new_sources_hmtl, output_query, output_language, new_figures, current_graphs], concurrency_limit=8, api_name=f"chat_{textbox.elem_id}")
274
+ .then(finish_chat, None, [textbox], api_name=f"finish_chat_{textbox.elem_id}")
275
+ )
276
+ # Event for examples_hidden
277
+ (examples_hidden
278
+ .change(start_chat, [examples_hidden, chatbot, search_only], [examples_hidden, tabs, chatbot, sources_raw], queue=False, api_name=f"start_chat_{examples_hidden.elem_id}")
279
+ .then(chat_poc, [examples_hidden, chatbot, dropdown_audience, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only], [chatbot, new_sources_hmtl, output_query, output_language, new_figures, current_graphs], concurrency_limit=8, api_name=f"chat_{examples_hidden.elem_id}")
280
+ .then(finish_chat, None, [textbox], api_name=f"finish_chat_{examples_hidden.elem_id}")
281
+ )
282
+
283
+
284
+ new_sources_hmtl.change(lambda x : x, inputs = [new_sources_hmtl], outputs = [sources_textbox])
285
+ current_graphs.change(lambda x: x, inputs=[current_graphs], outputs=[graphs_container])
286
+ new_figures.change(process_figures, inputs=[sources_raw, new_figures], outputs=[sources_raw, figures_cards, gallery_component])
287
 
288
+ # Update sources numbers
289
+ for component in [sources_textbox, figures_cards, current_graphs, papers_html]:
290
+ component.change(update_sources_number_display, [sources_textbox, figures_cards, current_graphs, papers_html], [tab_recommended_content, tab_sources, tab_figures, tab_graphs, tab_papers])
291
+
292
+ # Search for papers
293
+ for component in [textbox, examples_hidden, papers_direct_search]:
294
+ component.submit(find_papers, [component, after, dropdown_external_sources], [papers_html, citations_network, papers_summary])
295
+
296
+
297
 
298
+ if tab_name == "Beta - POC Adapt'Action":
299
+ # Drias search
300
+ textbox.submit(ask_vanna, [textbox], [vanna_sql_query ,vanna_table, vanna_display])
301
+
302
+ def main_ui():
303
+ # config_open = gr.State(True)
304
+ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=theme, elem_id="main-component") as demo:
305
+ config_components = create_config_modal()
306
+
307
+ with gr.Tabs():
308
+ cqa_components = cqa_tab(tab_name = "ClimateQ&A")
309
+ local_cqa_components = cqa_tab(tab_name = "Beta - POC Adapt'Action")
310
+
311
+ create_about_tab()
312
+
313
+ event_handling(cqa_components, config_components, tab_name = 'ClimateQ&A')
314
+ event_handling(local_cqa_components, config_components, tab_name = "Beta - POC Adapt'Action")
315
+
316
+ demo.queue()
317
+
318
+ return demo
319
 
320
+
321
+ demo = main_ui()
322
  demo.launch(ssr_mode=False)
climateqa/chat.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from datetime import datetime
3
+ import gradio as gr
4
+ # from .agent import agent
5
+ from gradio import ChatMessage
6
+ from langgraph.graph.state import CompiledStateGraph
7
+ import json
8
+
9
+ from .handle_stream_events import (
10
+ init_audience,
11
+ handle_retrieved_documents,
12
+ convert_to_docs_to_html,
13
+ stream_answer,
14
+ handle_retrieved_owid_graphs,
15
+ serialize_docs,
16
+ )
17
+
18
+ # Function to log data on Azure
19
+ def log_on_azure(file, logs, share_client):
20
+ logs = json.dumps(logs)
21
+ file_client = share_client.get_file_client(file)
22
+ file_client.upload_file(logs)
23
+
24
+ # Chat functions
25
+ def start_chat(query, history, search_only):
26
+ history = history + [ChatMessage(role="user", content=query)]
27
+ if not search_only:
28
+ return (gr.update(interactive=False), gr.update(selected=1), history, [])
29
+ else:
30
+ return (gr.update(interactive=False), gr.update(selected=2), history, [])
31
+
32
+ def finish_chat():
33
+ return gr.update(interactive=True, value="")
34
+
35
+ def log_interaction_to_azure(history, output_query, sources, docs, share_client, user_id):
36
+ try:
37
+ # Log interaction to Azure if not in local environment
38
+ if os.getenv("GRADIO_ENV") != "local":
39
+ timestamp = str(datetime.now().timestamp())
40
+ prompt = history[1]["content"]
41
+ logs = {
42
+ "user_id": str(user_id),
43
+ "prompt": prompt,
44
+ "query": prompt,
45
+ "question": output_query,
46
+ "sources": sources,
47
+ "docs": serialize_docs(docs),
48
+ "answer": history[-1].content,
49
+ "time": timestamp,
50
+ }
51
+ log_on_azure(f"{timestamp}.json", logs, share_client)
52
+ except Exception as e:
53
+ print(f"Error logging on Azure Blob Storage: {e}")
54
+ error_msg = f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)"
55
+ raise gr.Error(error_msg)
56
+
57
+ def handle_numerical_data(event):
58
+ if event["name"] == "retrieve_drias_data" and event["event"] == "on_chain_end":
59
+ numerical_data = event["data"]["output"]["drias_data"]
60
+ sql_query = event["data"]["output"]["drias_sql_query"]
61
+ return numerical_data, sql_query
62
+ return None, None
63
+
64
+ # Main chat function
65
+ async def chat_stream(
66
+ agent : CompiledStateGraph,
67
+ query: str,
68
+ history: list[ChatMessage],
69
+ audience: str,
70
+ sources: list[str],
71
+ reports: list[str],
72
+ relevant_content_sources_selection: list[str],
73
+ search_only: bool,
74
+ share_client,
75
+ user_id: str
76
+ ) -> tuple[list, str, str, str, list, str]:
77
+ """Process a chat query and return response with relevant sources and visualizations.
78
+
79
+ Args:
80
+ query (str): The user's question
81
+ history (list): Chat message history
82
+ audience (str): Target audience type
83
+ sources (list): Knowledge base sources to search
84
+ reports (list): Specific reports to search within sources
85
+ relevant_content_sources_selection (list): Types of content to retrieve (figures, papers, etc)
86
+ search_only (bool): Whether to only search without generating answer
87
+
88
+ Yields:
89
+ tuple: Contains:
90
+ - history: Updated chat history
91
+ - docs_html: HTML of retrieved documents
92
+ - output_query: Processed query
93
+ - output_language: Detected language
94
+ - related_contents: Related content
95
+ - graphs_html: HTML of relevant graphs
96
+ """
97
+ # Log incoming question
98
+ date_now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
99
+ print(f">> NEW QUESTION ({date_now}) : {query}")
100
+
101
+ audience_prompt = init_audience(audience)
102
+ sources = sources or ["IPCC", "IPBES"]
103
+ reports = reports or []
104
+
105
+ # Prepare inputs for agent
106
+ inputs = {
107
+ "user_input": query,
108
+ "audience": audience_prompt,
109
+ "sources_input": sources,
110
+ "relevant_content_sources_selection": relevant_content_sources_selection,
111
+ "search_only": search_only,
112
+ "reports": reports
113
+ }
114
+
115
+ # Get streaming events from agent
116
+ result = agent.astream_events(inputs, version="v1")
117
+
118
+ # Initialize state variables
119
+ docs = []
120
+ related_contents = []
121
+ docs_html = ""
122
+ new_docs_html = ""
123
+ output_query = ""
124
+ output_language = ""
125
+ output_keywords = ""
126
+ start_streaming = False
127
+ graphs_html = ""
128
+ used_documents = []
129
+ retrieved_contents = []
130
+ answer_message_content = ""
131
+ vanna_data = {}
132
+
133
+ # Define processing steps
134
+ steps_display = {
135
+ "categorize_intent": ("🔄️ Analyzing user message", True),
136
+ "transform_query": ("🔄️ Thinking step by step to answer the question", True),
137
+ "retrieve_documents": ("🔄️ Searching in the knowledge base", False),
138
+ "retrieve_local_data": ("🔄️ Searching in the knowledge base", False),
139
+ }
140
+
141
+ try:
142
+ # Process streaming events
143
+ async for event in result:
144
+
145
+ if "langgraph_node" in event["metadata"]:
146
+ node = event["metadata"]["langgraph_node"]
147
+
148
+ # Handle document retrieval
149
+ if event["event"] == "on_chain_end" and event["name"] in ["retrieve_documents","retrieve_local_data"] and event["data"]["output"] != None:
150
+ history, used_documents, retrieved_contents = handle_retrieved_documents(
151
+ event, history, used_documents, retrieved_contents
152
+ )
153
+ # Handle Vanna retrieval
154
+ # if event["event"] == "on_chain_end" and event["name"] in ["retrieve_documents","retrieve_local_data"] and event["data"]["output"] != None:
155
+ # df_output_vanna, sql_query = handle_numerical_data(
156
+ # event
157
+ # )
158
+ # vanna_data = {"df_output": df_output_vanna, "sql_query": sql_query}
159
+
160
+
161
+ if event["event"] == "on_chain_end" and event["name"] == "answer_search" :
162
+ docs = event["data"]["input"]["documents"]
163
+ docs_html = convert_to_docs_to_html(docs)
164
+ related_contents = event["data"]["input"]["related_contents"]
165
+
166
+ # Handle intent categorization
167
+ elif (event["event"] == "on_chain_end" and
168
+ node == "categorize_intent" and
169
+ event["name"] == "_write"):
170
+ intent = event["data"]["output"]["intent"]
171
+ output_language = event["data"]["output"].get("language", "English")
172
+ history[-1].content = f"Language identified: {output_language}\nIntent identified: {intent}"
173
+
174
+ # Handle processing steps display
175
+ elif event["name"] in steps_display and event["event"] == "on_chain_start":
176
+ event_description, display_output = steps_display[node]
177
+ if (not hasattr(history[-1], 'metadata') or
178
+ history[-1].metadata["title"] != event_description):
179
+ history.append(ChatMessage(
180
+ role="assistant",
181
+ content="",
182
+ metadata={'title': event_description}
183
+ ))
184
+
185
+ # Handle answer streaming
186
+ elif (event["name"] != "transform_query" and
187
+ event["event"] == "on_chat_model_stream" and
188
+ node in ["answer_rag","answer_rag_no_docs", "answer_search", "answer_chitchat"]):
189
+ history, start_streaming, answer_message_content = stream_answer(
190
+ history, event, start_streaming, answer_message_content
191
+ )
192
+
193
+ # Handle graph retrieval
194
+ elif event["name"] in ["retrieve_graphs", "retrieve_graphs_ai"] and event["event"] == "on_chain_end":
195
+ graphs_html = handle_retrieved_owid_graphs(event, graphs_html)
196
+
197
+ # Handle query transformation
198
+ if event["name"] == "transform_query" and event["event"] == "on_chain_end":
199
+ if hasattr(history[-1], "content"):
200
+ sub_questions = [q["question"] + "-> relevant sources : " + str(q["sources"]) for q in event["data"]["output"]["questions_list"]]
201
+ history[-1].content += "Decompose question into sub-questions:\n\n - " + "\n - ".join(sub_questions)
202
+
203
+ yield history, docs_html, output_query, output_language, related_contents, graphs_html#, vanna_data
204
+
205
+ except Exception as e:
206
+ print(f"Event {event} has failed")
207
+ raise gr.Error(str(e))
208
+
209
+
210
+
211
+ # Call the function to log interaction
212
+ log_interaction_to_azure(history, output_query, sources, docs, share_client, user_id)
213
+
214
+ yield history, docs_html, output_query, output_language, related_contents, graphs_html#, vanna_data
climateqa/constants.py CHANGED
@@ -1,4 +1,6 @@
1
  POSSIBLE_REPORTS = [
 
 
2
  "IPCC AR6 WGI SPM",
3
  "IPCC AR6 WGI FR",
4
  "IPCC AR6 WGI TS",
 
1
  POSSIBLE_REPORTS = [
2
+ "IPBES IABWFH SPM",
3
+ "IPBES CBL SPM",
4
  "IPCC AR6 WGI SPM",
5
  "IPCC AR6 WGI FR",
6
  "IPCC AR6 WGI TS",
climateqa/engine/chains/answer_rag.py CHANGED
@@ -11,7 +11,7 @@ import time
11
  from ..utils import rename_chain, pass_values
12
 
13
 
14
- DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
15
 
16
  def _combine_documents(
17
  docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, sep="\n\n"
@@ -61,10 +61,11 @@ def make_rag_node(llm,with_docs = True):
61
  rag_chain = make_rag_chain(llm)
62
  else:
63
  rag_chain = make_rag_chain_without_docs(llm)
64
-
65
  async def answer_rag(state,config):
66
  print("---- Answer RAG ----")
67
  start_time = time.time()
 
68
 
69
  answer = await rag_chain.ainvoke(state,config)
70
 
 
11
  from ..utils import rename_chain, pass_values
12
 
13
 
14
+ DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="Source : {source} - Content : {page_content}")
15
 
16
  def _combine_documents(
17
  docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, sep="\n\n"
 
61
  rag_chain = make_rag_chain(llm)
62
  else:
63
  rag_chain = make_rag_chain_without_docs(llm)
64
+
65
  async def answer_rag(state,config):
66
  print("---- Answer RAG ----")
67
  start_time = time.time()
68
+ print("Sources used : " + "\n".join([x.metadata["short_name"] + " - page " + str(x.metadata["page_number"]) for x in state["documents"]]))
69
 
70
  answer = await rag_chain.ainvoke(state,config)
71
 
climateqa/engine/chains/drias_retriever.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ from climateqa.engine.talk_to_data.main import ask_vanna
4
+
5
+
6
+ def make_drias_retriever_node(llm):
7
+
8
+ def retrieve_drias_data(state):
9
+ print("---- Retrieving data from DRIAS ----")
10
+ query = state["query"]
11
+ sql_query, df, fig = ask_vanna(query)
12
+ state["drias_data"] = df
13
+ state["drias_sql_query"] = sql_query
14
+ return state
15
+
16
+ return retrieve_drias_data
climateqa/engine/chains/graph_retriever.py CHANGED
@@ -50,7 +50,9 @@ def make_graph_retriever_node(vectorstore, reranker, rerank_by_question=True, k_
50
  print("---- Retrieving graphs ----")
51
 
52
  POSSIBLE_SOURCES = ["IEA", "OWID"]
53
- questions = state["remaining_questions"] if state["remaining_questions"] is not None and state["remaining_questions"]!=[] else [state["query"]]
 
 
54
  # sources_input = state["sources_input"]
55
  sources_input = ["auto"]
56
 
 
50
  print("---- Retrieving graphs ----")
51
 
52
  POSSIBLE_SOURCES = ["IEA", "OWID"]
53
+ # questions = state["remaining_questions"] if state["remaining_questions"] is not None and state["remaining_questions"]!=[] else [state["query"]]
54
+ questions = state["questions_list"] if state["questions_list"] is not None and state["questions_list"]!=[] else [state["query"]]
55
+
56
  # sources_input = state["sources_input"]
57
  sources_input = ["auto"]
58
 
climateqa/engine/chains/intent_categorization.py CHANGED
@@ -29,7 +29,7 @@ class IntentCategorizer(BaseModel):
29
  Examples:
30
  - ai_impact = Environmental impacts of AI: "What are the environmental impacts of AI", "How does AI affect the environment"
31
  - search = Searching for any quesiton about climate change, energy, biodiversity, nature, and everything we can find the IPCC or IPBES reports or scientific papers,
32
- - chitchat = Any general question that is not related to the environment or climate change or just conversational, or if you don't think searching the IPCC or IPBES reports would be relevant
33
  """,
34
  # - geo_info = Geolocated info about climate change: Any question where the user wants to know localized impacts of climate change, eg: "What will be the temperature in Marseille in 2050"
35
  # - esg = Any question about the ESG regulation, frameworks and standards like the CSRD, TCFD, SASB, GRI, CDP, etc.
@@ -57,6 +57,7 @@ def make_intent_categorization_node(llm):
57
  categorization_chain = make_intent_categorization_chain(llm)
58
 
59
  def categorize_message(state):
 
60
  print("---- Categorize_message ----")
61
 
62
  output = categorization_chain.invoke({"input": state["user_input"]})
 
29
  Examples:
30
  - ai_impact = Environmental impacts of AI: "What are the environmental impacts of AI", "How does AI affect the environment"
31
  - search = Searching for any quesiton about climate change, energy, biodiversity, nature, and everything we can find the IPCC or IPBES reports or scientific papers,
32
+ - chitchat = Any general question that is not related to the environment or climate change or just conversational, or if you don't think searching the IPCC or IPBES reports would be relevant. If it can be interprated as a climate related question, please use the search intent.
33
  """,
34
  # - geo_info = Geolocated info about climate change: Any question where the user wants to know localized impacts of climate change, eg: "What will be the temperature in Marseille in 2050"
35
  # - esg = Any question about the ESG regulation, frameworks and standards like the CSRD, TCFD, SASB, GRI, CDP, etc.
 
57
  categorization_chain = make_intent_categorization_chain(llm)
58
 
59
  def categorize_message(state):
60
+ print("Input Message : ", state["user_input"])
61
  print("---- Categorize_message ----")
62
 
63
  output = categorization_chain.invoke({"input": state["user_input"]})
climateqa/engine/chains/prompts.py CHANGED
@@ -36,13 +36,41 @@ You are given a question and extracted passages of the IPCC and/or IPBES reports
36
  """
37
 
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  answer_prompt_template = """
40
- You are ClimateQ&A, an AI Assistant created by Ekimetrics. You are given a question and extracted passages of the IPCC and/or IPBES reports. Provide a clear and structured answer based on the passages provided, the context and the guidelines.
41
 
42
  Guidelines:
43
  - If the passages have useful facts or numbers, use them in your answer.
44
  - When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
45
- - Do not use the sentence 'Doc i says ...' to say where information came from.
 
 
 
 
46
  - If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]
47
  - Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.
48
  - If it makes sense, use bullet points and lists to make your answers easier to understand.
@@ -51,6 +79,7 @@ Guidelines:
51
  - Consider by default that the question is about the past century unless it is specified otherwise.
52
  - If the passage is the caption of a picture, you can still use it as part of your answer as any other document.
53
 
 
54
  -----------------------
55
  Passages:
56
  {context}
@@ -60,7 +89,6 @@ Question: {query} - Explained to {audience}
60
  Answer in {language} with the passages citations:
61
  """
62
 
63
-
64
  papers_prompt_template = """
65
  You are ClimateQ&A, an AI Assistant created by Ekimetrics. You are given a question and extracted abstracts of scientific papers. Provide a clear and structured answer based on the abstracts provided, the context and the guidelines.
66
 
 
36
  """
37
 
38
 
39
+ # answer_prompt_template_old = """
40
+ # You are ClimateQ&A, an AI Assistant created by Ekimetrics. You are given a question and extracted passages of reports. Provide a clear and structured answer based on the passages provided, the context and the guidelines.
41
+
42
+ # Guidelines:
43
+ # - If the passages have useful facts or numbers, use them in your answer.
44
+ # - When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
45
+ # - Do not use the sentence 'Doc i says ...' to say where information came from.
46
+ # - If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]
47
+ # - Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.
48
+ # - If it makes sense, use bullet points and lists to make your answers easier to understand.
49
+ # - You do not need to use every passage. Only use the ones that help answer the question.
50
+ # - If the documents do not have the information needed to answer the question, just say you do not have enough information.
51
+ # - Consider by default that the question is about the past century unless it is specified otherwise.
52
+ # - If the passage is the caption of a picture, you can still use it as part of your answer as any other document.
53
+
54
+ # -----------------------
55
+ # Passages:
56
+ # {context}
57
+
58
+ # -----------------------
59
+ # Question: {query} - Explained to {audience}
60
+ # Answer in {language} with the passages citations:
61
+ # """
62
+
63
  answer_prompt_template = """
64
+ You are ClimateQ&A, an AI Assistant created by Ekimetrics. You are given a question and extracted passages of reports. Provide a clear and structured answer based on the passages provided, the context and the guidelines.
65
 
66
  Guidelines:
67
  - If the passages have useful facts or numbers, use them in your answer.
68
  - When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
69
+ - You will receive passages from different reports, e.g., IPCC and PPCP. Make separate paragraphs and specify the source of the information in your answer, e.g., "According to IPCC, ...".
70
+ - The different sources are IPCC, IPBES, PPCP (for Plan Climat Air Energie Territorial de Paris), PBDP (for Plan Biodiversité de Paris), Acclimaterra (Rapport scientifique de la région Nouvelle Aquitaine en France).
71
+ - If the reports are local (like PPCP, PBDP, Acclimaterra), consider that the information is specific to the region and not global. If the document is about a nearby region (for example, an extract from Acclimaterra for a question about Britain), explicitly state the concerned region.
72
+ - Do not mention that you are using specific extract documents, but mention only the source information. "According to IPCC, ..." rather than "According to the provided document from IPCC ..."
73
+ - Make a clear distinction between information from IPCC, IPBES, Acclimaterra that are scientific reports and PPCP, PBDP that are strategic reports. Strategic reports should not be taken as verified facts, but as political or strategic decisions.
74
  - If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]
75
  - Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.
76
  - If it makes sense, use bullet points and lists to make your answers easier to understand.
 
79
  - Consider by default that the question is about the past century unless it is specified otherwise.
80
  - If the passage is the caption of a picture, you can still use it as part of your answer as any other document.
81
 
82
+
83
  -----------------------
84
  Passages:
85
  {context}
 
89
  Answer in {language} with the passages citations:
90
  """
91
 
 
92
  papers_prompt_template = """
93
  You are ClimateQ&A, an AI Assistant created by Ekimetrics. You are given a question and extracted abstracts of scientific papers. Provide a clear and structured answer based on the abstracts provided, the context and the guidelines.
94
 
climateqa/engine/chains/query_transformation.py CHANGED
@@ -7,43 +7,7 @@ from langchain.prompts import ChatPromptTemplate
7
  from langchain_core.utils.function_calling import convert_to_openai_function
8
  from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
9
 
10
-
11
- ROUTING_INDEX = {
12
- "Vector":["IPCC","IPBES","IPOS"],
13
- "OpenAlex":["OpenAlex"],
14
- }
15
-
16
- POSSIBLE_SOURCES = [y for values in ROUTING_INDEX.values() for y in values]
17
-
18
- # Prompt from the original paper https://arxiv.org/pdf/2305.14283
19
- # Query Rewriting for Retrieval-Augmented Large Language Models
20
- class QueryDecomposition(BaseModel):
21
- """
22
- Decompose the user query into smaller parts to think step by step to answer this question
23
- Act as a simple planning agent
24
- """
25
-
26
- questions: List[str] = Field(
27
- description="""
28
- Think step by step to answer this question, and provide one or several search engine questions in English for knowledge that you need.
29
- Suppose that the user is looking for information about climate change, energy, biodiversity, nature, and everything we can find the IPCC reports and scientific literature
30
- - If it's already a standalone and explicit question, just return the reformulated question for the search engine
31
- - If you need to decompose the question, output a list of maximum 2 to 3 questions
32
- """
33
- )
34
-
35
-
36
- class Location(BaseModel):
37
- country:str = Field(...,description="The country if directly mentioned or inferred from the location (cities, regions, adresses), ex: France, USA, ...")
38
- location:str = Field(...,description="The specific place if mentioned (cities, regions, addresses), ex: Marseille, New York, Wisconsin, ...")
39
-
40
- class QueryAnalysis(BaseModel):
41
- """
42
- Analyzing the user query to extract topics, sources and date
43
- Also do query expansion to get alternative search queries
44
- Also provide simple keywords to feed a search engine
45
- """
46
-
47
  # keywords: List[str] = Field(
48
  # description="""
49
  # Extract the keywords from the user query to feed a search engine as a list
@@ -68,17 +32,10 @@ class QueryAnalysis(BaseModel):
68
  # This questions should help you get more context and information about the user query
69
  # """
70
  # )
71
-
72
- sources: List[Literal["IPCC", "IPBES", "IPOS"]] = Field( #,"OpenAlex"]] = Field(
73
- ...,
74
- description="""
75
- Given a user question choose which documents would be most relevant for answering their question,
76
- - IPCC is for questions about climate change, energy, impacts, and everything we can find the IPCC reports
77
- - IPBES is for questions about biodiversity and nature
78
- - IPOS is for questions about the ocean and deep sea mining
79
- """,
80
- # - OpenAlex is for any other questions that are not in the previous categories but could be found in the scientific litterature
81
- )
82
  # topics: List[Literal[
83
  # "Climate change",
84
  # "Biodiversity",
@@ -101,7 +58,82 @@ class QueryAnalysis(BaseModel):
101
  # location:Location
102
 
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  def make_query_decomposition_chain(llm):
 
 
 
 
 
 
 
 
105
 
106
  openai_functions = [convert_to_openai_function(QueryDecomposition)]
107
  llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"QueryDecomposition"})
@@ -115,7 +147,8 @@ def make_query_decomposition_chain(llm):
115
  return chain
116
 
117
 
118
- def make_query_rewriter_chain(llm):
 
119
 
120
  openai_functions = [convert_to_openai_function(QueryAnalysis)]
121
  llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"QueryAnalysis"})
@@ -123,7 +156,7 @@ def make_query_rewriter_chain(llm):
123
 
124
 
125
  prompt = ChatPromptTemplate.from_messages([
126
- ("system", "You are a helpful assistant, you will analyze, translate and reformulate the user input message using the function provided"),
127
  ("user", "input: {input}")
128
  ])
129
 
@@ -132,22 +165,63 @@ def make_query_rewriter_chain(llm):
132
  return chain
133
 
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  def make_query_transform_node(llm,k_final=15):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
  decomposition_chain = make_query_decomposition_chain(llm)
138
- rewriter_chain = make_query_rewriter_chain(llm)
 
139
 
140
  def transform_query(state):
141
  print("---- Transform query ----")
142
 
143
-
144
- if "sources_auto" not in state or state["sources_auto"] is None or state["sources_auto"] is False:
145
- auto_mode = False
146
- else:
147
- auto_mode = True
148
-
149
- sources_input = state.get("sources_input")
150
- if sources_input is None: sources_input = ROUTING_INDEX["Vector"]
151
 
152
  new_state = {}
153
 
@@ -155,24 +229,41 @@ def make_query_transform_node(llm,k_final=15):
155
  decomposition_output = decomposition_chain.invoke({"input":state["query"]})
156
  new_state.update(decomposition_output)
157
 
 
158
  # Query Analysis
159
  questions = []
160
  for question in new_state["questions"]:
161
  question_state = {"question":question}
162
- analysis_output = rewriter_chain.invoke({"input":question})
163
 
164
  # TODO WARNING llm should always return smthg
165
- # The case when the llm does not return any sources
166
- if not analysis_output["sources"] or not all(source in ["IPCC", "IPBS", "IPOS"] for source in analysis_output["sources"]):
167
- analysis_output["sources"] = ["IPCC", "IPBES", "IPOS"]
 
 
 
 
 
 
 
 
 
 
168
 
169
- question_state.update(analysis_output)
170
- questions.append(question_state)
 
 
 
 
 
 
171
 
172
  # Explode the questions into multiple questions with different sources
173
  new_questions = []
174
  for q in questions:
175
- question,sources = q["question"],q["sources"]
176
 
177
  # If not auto mode we take the configuration
178
  if not auto_mode:
@@ -181,7 +272,7 @@ def make_query_transform_node(llm,k_final=15):
181
  for index,index_sources in ROUTING_INDEX.items():
182
  selected_sources = list(set(sources).intersection(index_sources))
183
  if len(selected_sources) > 0:
184
- new_questions.append({"question":question,"sources":selected_sources,"index":index})
185
 
186
  # # Add the number of questions to search
187
  # k_by_question = k_final // len(new_questions)
@@ -191,11 +282,19 @@ def make_query_transform_node(llm,k_final=15):
191
  # new_state["questions"] = new_questions
192
  # new_state["remaining_questions"] = new_questions
193
 
 
 
 
 
 
194
 
195
  new_state = {
196
- "remaining_questions":new_questions,
197
- "n_questions":len(new_questions),
 
198
  }
 
 
199
  return new_state
200
 
201
  return transform_query
 
7
  from langchain_core.utils.function_calling import convert_to_openai_function
8
  from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
9
 
10
+ # OLD QUERY ANALYSIS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  # keywords: List[str] = Field(
12
  # description="""
13
  # Extract the keywords from the user query to feed a search engine as a list
 
32
  # This questions should help you get more context and information about the user query
33
  # """
34
  # )
35
+ # - OpenAlex is for any other questions that are not in the previous categories but could be found in the scientific litterature
36
+ #
37
+
38
+
 
 
 
 
 
 
 
39
  # topics: List[Literal[
40
  # "Climate change",
41
  # "Biodiversity",
 
58
  # location:Location
59
 
60
 
61
+
62
+ ROUTING_INDEX = {
63
+ "IPx":["IPCC", "IPBES", "IPOS"],
64
+ "POC": ["AcclimaTerra", "PCAET","Biodiv"],
65
+ "OpenAlex":["OpenAlex"],
66
+ }
67
+
68
+ POSSIBLE_SOURCES = [y for values in ROUTING_INDEX.values() for y in values]
69
+
70
+ # Prompt from the original paper https://arxiv.org/pdf/2305.14283
71
+ # Query Rewriting for Retrieval-Augmented Large Language Models
72
+ class QueryDecomposition(BaseModel):
73
+ """
74
+ Decompose the user query into smaller parts to think step by step to answer this question
75
+ Act as a simple planning agent
76
+ """
77
+
78
+ questions: List[str] = Field(
79
+ description="""
80
+ Think step by step to answer this question, and provide one or several search engine questions in the provided language for knowledge that you need.
81
+ Suppose that the user is looking for information about climate change, energy, biodiversity, nature, and everything we can find the IPCC reports and scientific literature
82
+ - If it's already a standalone and explicit question, just return the reformulated question for the search engine
83
+ - If you need to decompose the question, output a list of maximum 2 to 3 questions
84
+ """
85
+ )
86
+
87
+
88
+ class Location(BaseModel):
89
+ country:str = Field(...,description="The country if directly mentioned or inferred from the location (cities, regions, adresses), ex: France, USA, ...")
90
+ location:str = Field(...,description="The specific place if mentioned (cities, regions, addresses), ex: Marseille, New York, Wisconsin, ...")
91
+
92
+ class QueryTranslation(BaseModel):
93
+ """Translate the query into a given language"""
94
+
95
+ question : str = Field(
96
+ description="""
97
+ Translate the questions into the given language
98
+ If the question is alrealdy in the given language, just return the same question
99
+ """,
100
+ )
101
+
102
+
103
+ class QueryAnalysis(BaseModel):
104
+ """
105
+ Analyze the user query to extract the relevant sources
106
+
107
+ Deprecated:
108
+ Analyzing the user query to extract topics, sources and date
109
+ Also do query expansion to get alternative search queries
110
+ Also provide simple keywords to feed a search engine
111
+ """
112
+
113
+ sources: List[Literal["IPCC", "IPBES", "IPOS", "AcclimaTerra", "PCAET","Biodiv"]] = Field( #,"OpenAlex"]] = Field(
114
+ ...,
115
+ description="""
116
+ Given a user question choose which documents would be most relevant for answering their question,
117
+ - IPCC is for questions about climate change, energy, impacts, and everything we can find the IPCC reports
118
+ - IPBES is for questions about biodiversity and nature
119
+ - IPOS is for questions about the ocean and deep sea mining
120
+ - AcclimaTerra is for questions about any specific place in, or close to, the french region "Nouvelle-Aquitaine"
121
+ - PCAET is the Plan Climat Eneregie Territorial for the city of Paris
122
+ - Biodiv is the Biodiversity plan for the city of Paris
123
+ """,
124
+ )
125
+
126
+
127
+
128
  def make_query_decomposition_chain(llm):
129
+ """Chain to decompose a query into smaller parts to think step by step to answer this question
130
+
131
+ Args:
132
+ llm (_type_): _description_
133
+
134
+ Returns:
135
+ _type_: _description_
136
+ """
137
 
138
  openai_functions = [convert_to_openai_function(QueryDecomposition)]
139
  llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"QueryDecomposition"})
 
147
  return chain
148
 
149
 
150
+ def make_query_analysis_chain(llm):
151
+ """Analyze the user query to extract the relevant sources"""
152
 
153
  openai_functions = [convert_to_openai_function(QueryAnalysis)]
154
  llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"QueryAnalysis"})
 
156
 
157
 
158
  prompt = ChatPromptTemplate.from_messages([
159
+ ("system", "You are a helpful assistant, you will analyze the user input message using the function provided"),
160
  ("user", "input: {input}")
161
  ])
162
 
 
165
  return chain
166
 
167
 
168
+ def make_query_translation_chain(llm):
169
+ """Analyze the user query to extract the relevant sources"""
170
+
171
+ openai_functions = [convert_to_openai_function(QueryTranslation)]
172
+ llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"QueryTranslation"})
173
+
174
+
175
+
176
+ prompt = ChatPromptTemplate.from_messages([
177
+ ("system", "You are a helpful assistant, translate the question into {language}"),
178
+ ("user", "input: {input}")
179
+ ])
180
+
181
+
182
+ chain = prompt | llm_with_functions | JsonOutputFunctionsParser()
183
+ return chain
184
+
185
+ def group_by_sources_types(sources):
186
+ sources_types = {}
187
+ IPx_sources = ["IPCC", "IPBES", "IPOS"]
188
+ local_sources = ["AcclimaTerra", "PCAET","Biodiv"]
189
+ if any(source in IPx_sources for source in sources):
190
+ sources_types["IPx"] = list(set(sources).intersection(IPx_sources))
191
+ if any(source in local_sources for source in sources):
192
+ sources_types["POC"] = list(set(sources).intersection(local_sources))
193
+ return sources_types
194
+
195
+
196
  def make_query_transform_node(llm,k_final=15):
197
+ """
198
+ Creates a query transformation node that processes and transforms a given query state.
199
+ Args:
200
+ llm: The language model to be used for query decomposition and rewriting.
201
+ k_final (int, optional): The final number of questions to be generated. Defaults to 15.
202
+ Returns:
203
+ function: A function that takes a query state and returns a transformed state.
204
+ The returned function performs the following steps:
205
+ 1. Checks if the query should be processed in auto mode based on the state.
206
+ 2. Retrieves the input sources from the state or defaults to a predefined routing index.
207
+ 3. Decomposes the query using the decomposition chain.
208
+ 4. Analyzes each decomposed question using the rewriter chain.
209
+ 5. Ensures that the sources returned by the language model are valid.
210
+ 6. Explodes the questions into multiple questions with different sources based on the mode.
211
+ 7. Constructs a new state with the transformed questions and their respective sources.
212
+ """
213
+
214
 
215
  decomposition_chain = make_query_decomposition_chain(llm)
216
+ query_analysis_chain = make_query_analysis_chain(llm)
217
+ query_translation_chain = make_query_translation_chain(llm)
218
 
219
  def transform_query(state):
220
  print("---- Transform query ----")
221
 
222
+ auto_mode = state.get("sources_auto", True)
223
+ sources_input = state.get("sources_input", ROUTING_INDEX["IPx"])
224
+
 
 
 
 
 
225
 
226
  new_state = {}
227
 
 
229
  decomposition_output = decomposition_chain.invoke({"input":state["query"]})
230
  new_state.update(decomposition_output)
231
 
232
+
233
  # Query Analysis
234
  questions = []
235
  for question in new_state["questions"]:
236
  question_state = {"question":question}
237
+ query_analysis_output = query_analysis_chain.invoke({"input":question})
238
 
239
  # TODO WARNING llm should always return smthg
240
+ # The case when the llm does not return any sources or wrong ouput
241
+ if not query_analysis_output["sources"] or not all(source in ["IPCC", "IPBS", "IPOS","AcclimaTerra", "PCAET","Biodiv"] for source in query_analysis_output["sources"]):
242
+ query_analysis_output["sources"] = ["IPCC", "IPBES", "IPOS"]
243
+
244
+ sources_types = group_by_sources_types(query_analysis_output["sources"])
245
+ for source_type,sources in sources_types.items():
246
+ question_state = {
247
+ "question":question,
248
+ "sources":sources,
249
+ "source_type":source_type
250
+ }
251
+
252
+ questions.append(question_state)
253
 
254
+ # Translate question into the document language
255
+ for q in questions:
256
+ if q["source_type"]=="IPx":
257
+ translation_output = query_translation_chain.invoke({"input":q["question"],"language":"English"})
258
+ q["question"] = translation_output["question"]
259
+ elif q["source_type"]=="POC":
260
+ translation_output = query_translation_chain.invoke({"input":q["question"],"language":"French"})
261
+ q["question"] = translation_output["question"]
262
 
263
  # Explode the questions into multiple questions with different sources
264
  new_questions = []
265
  for q in questions:
266
+ question,sources,source_type = q["question"],q["sources"], q["source_type"]
267
 
268
  # If not auto mode we take the configuration
269
  if not auto_mode:
 
272
  for index,index_sources in ROUTING_INDEX.items():
273
  selected_sources = list(set(sources).intersection(index_sources))
274
  if len(selected_sources) > 0:
275
+ new_questions.append({"question":question,"sources":selected_sources,"index":index, "source_type":source_type})
276
 
277
  # # Add the number of questions to search
278
  # k_by_question = k_final // len(new_questions)
 
282
  # new_state["questions"] = new_questions
283
  # new_state["remaining_questions"] = new_questions
284
 
285
+ n_questions = {
286
+ "total":len(new_questions),
287
+ "IPx":len([q for q in new_questions if q["index"] == "IPx"]),
288
+ "POC":len([q for q in new_questions if q["index"] == "POC"]),
289
+ }
290
 
291
  new_state = {
292
+ "questions_list":new_questions,
293
+ "n_questions":n_questions,
294
+ "handled_questions_index":[],
295
  }
296
+ print("New questions")
297
+ print(new_questions)
298
  return new_state
299
 
300
  return transform_query
climateqa/engine/chains/retrieve_documents.py CHANGED
@@ -7,7 +7,7 @@ from langchain_core.runnables import chain
7
  from langchain_core.runnables import RunnableParallel, RunnablePassthrough
8
  from langchain_core.runnables import RunnableLambda
9
 
10
- from ..reranker import rerank_docs
11
  # from ...knowledge.retriever import ClimateQARetriever
12
  from ...knowledge.openalex import OpenAlexRetriever
13
  from .keywords_extraction import make_keywords_extraction_chain
@@ -15,7 +15,9 @@ from ..utils import log_event
15
  from langchain_core.vectorstores import VectorStore
16
  from typing import List
17
  from langchain_core.documents.base import Document
 
18
 
 
19
 
20
 
21
  def divide_into_parts(target, parts):
@@ -87,7 +89,7 @@ def _get_k_images_by_question(n_questions):
87
  elif n_questions == 2:
88
  return 5
89
  elif n_questions == 3:
90
- return 2
91
  else:
92
  return 1
93
 
@@ -98,11 +100,77 @@ def _add_metadata_and_score(docs: List) -> Document:
98
  doc.page_content = doc.page_content.replace("\r\n"," ")
99
  doc.metadata["similarity_score"] = score
100
  doc.metadata["content"] = doc.page_content
101
- doc.metadata["page_number"] = int(doc.metadata["page_number"]) + 1
 
 
 
102
  # doc.page_content = f"""Doc {i+1} - {doc.metadata['short_name']}: {doc.page_content}"""
103
  docs_with_metadata.append(doc)
104
  return docs_with_metadata
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  async def get_IPCC_relevant_documents(
107
  query: str,
108
  vectorstore:VectorStore,
@@ -164,8 +232,7 @@ async def get_IPCC_relevant_documents(
164
  "chunk_type":"text",
165
  "report_type": { "$nin":["SPM"]},
166
  }
167
- k_full = k_total - len(docs_summaries)
168
- docs_full = vectorstore.similarity_search_with_score(query=query,filter = filters_full,k = k_full)
169
 
170
  if search_figures:
171
  # Images
@@ -188,15 +255,45 @@ async def get_IPCC_relevant_documents(
188
  }
189
 
190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
  # The chain callback is not necessary, but it propagates the langchain callbacks to the astream_events logger to display intermediate results
193
  # @chain
194
- async def retrieve_documents(state,config, vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5, k_images=5):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  """
196
- Retrieve and rerank documents based on the current question in the state.
197
 
198
  Args:
199
  state (dict): The current state containing documents, related content, relevant content sources, remaining questions and n_questions.
 
200
  config (dict): Configuration settings for logging and other purposes.
201
  vectorstore (object): The vector store used to retrieve relevant documents.
202
  reranker (object): The reranker used to rerank the retrieved documents.
@@ -209,95 +306,194 @@ async def retrieve_documents(state,config, vectorstore,reranker,llm,rerank_by_qu
209
  Returns:
210
  dict: The updated state containing the retrieved and reranked documents, related content, and remaining questions.
211
  """
212
- print("---- Retrieve documents ----")
213
-
214
- # Get the documents from the state
215
- if "documents" in state and state["documents"] is not None:
216
- docs = state["documents"]
217
- else:
218
- docs = []
219
- # Get the related_content from the state
220
- if "related_content" in state and state["related_content"] is not None:
221
- related_content = state["related_content"]
222
- else:
223
- related_content = []
224
-
225
- search_figures = "IPCC figures" in state["relevant_content_sources"]
226
- search_only = state["search_only"]
227
-
228
- # Get the current question
229
- current_question = state["remaining_questions"][0]
230
- remaining_questions = state["remaining_questions"][1:]
231
-
232
- k_by_question = k_final // state["n_questions"]
233
- k_summary_by_question = _get_k_summary_by_question(state["n_questions"])
234
- k_images_by_question = _get_k_images_by_question(state["n_questions"])
235
-
236
  sources = current_question["sources"]
237
  question = current_question["question"]
238
  index = current_question["index"]
 
239
 
240
  print(f"Retrieve documents for question: {question}")
241
  await log_event({"question":question,"sources":sources,"index":index},"log_retriever",config)
242
 
 
243
 
244
- if index == "Vector": # always true for now
245
  docs_question_dict = await get_IPCC_relevant_documents(
246
  query = question,
247
  vectorstore=vectorstore,
248
  search_figures = search_figures,
249
  sources = sources,
250
  min_size = 200,
251
- k_summary = k_summary_by_question,
252
  k_total = k_before_reranking,
253
  k_images = k_images_by_question,
254
  threshold = 0.5,
255
  search_only = search_only,
 
256
  )
257
-
258
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  # Rerank
260
- if reranker is not None:
261
  with suppress_output():
262
- docs_question_summary_reranked = rerank_docs(reranker,docs_question_dict["docs_summaries"],question)
263
- docs_question_fulltext_reranked = rerank_docs(reranker,docs_question_dict["docs_full"],question)
264
- docs_question_images_reranked = rerank_docs(reranker,docs_question_dict["docs_images"],question)
265
- if rerank_by_question:
266
- docs_question_summary_reranked = sorted(docs_question_summary_reranked, key=lambda x: x.metadata["reranking_score"], reverse=True)
267
- docs_question_fulltext_reranked = sorted(docs_question_fulltext_reranked, key=lambda x: x.metadata["reranking_score"], reverse=True)
268
- docs_question_images_reranked = sorted(docs_question_images_reranked, key=lambda x: x.metadata["reranking_score"], reverse=True)
269
  else:
270
- docs_question = docs_question_dict["docs_summaries"] + docs_question_dict["docs_full"]
271
  # Add a default reranking score
272
  for doc in docs_question:
273
  doc.metadata["reranking_score"] = doc.metadata["similarity_score"]
274
 
275
- docs_question = docs_question_summary_reranked + docs_question_fulltext_reranked
276
- docs_question = docs_question[:k_by_question]
277
- images_question = docs_question_images_reranked[:k_images]
278
-
279
  if reranker is not None and rerank_by_question:
280
- docs_question = sorted(docs_question, key=lambda x: x.metadata["reranking_score"], reverse=True)
281
-
282
  # Add sources used in the metadata
283
  docs_question = _add_sources_used_in_metadata(docs_question,sources,question,index)
284
  images_question = _add_sources_used_in_metadata(images_question,sources,question,index)
285
 
286
- # Add to the list of docs
287
- docs.extend(docs_question)
288
- related_content.extend(images_question)
289
- new_state = {"documents":docs, "related_contents": related_content,"remaining_questions":remaining_questions}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  return new_state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
 
 
292
 
293
 
294
- def make_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5):
295
- @chain
296
- async def retrieve_docs(state, config):
297
- state = await retrieve_documents(state,config, vectorstore,reranker,llm,rerank_by_question, k_final, k_before_reranking, k_summary)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
  return state
299
 
300
- return retrieve_docs
301
 
302
 
303
 
 
7
  from langchain_core.runnables import RunnableParallel, RunnablePassthrough
8
  from langchain_core.runnables import RunnableLambda
9
 
10
+ from ..reranker import rerank_docs, rerank_and_sort_docs
11
  # from ...knowledge.retriever import ClimateQARetriever
12
  from ...knowledge.openalex import OpenAlexRetriever
13
  from .keywords_extraction import make_keywords_extraction_chain
 
15
  from langchain_core.vectorstores import VectorStore
16
  from typing import List
17
  from langchain_core.documents.base import Document
18
+ import asyncio
19
 
20
+ from typing import Any, Dict, List, Tuple
21
 
22
 
23
  def divide_into_parts(target, parts):
 
89
  elif n_questions == 2:
90
  return 5
91
  elif n_questions == 3:
92
+ return 3
93
  else:
94
  return 1
95
 
 
100
  doc.page_content = doc.page_content.replace("\r\n"," ")
101
  doc.metadata["similarity_score"] = score
102
  doc.metadata["content"] = doc.page_content
103
+ if doc.metadata["page_number"] != "N/A":
104
+ doc.metadata["page_number"] = int(doc.metadata["page_number"]) + 1
105
+ else:
106
+ doc.metadata["page_number"] = 1
107
  # doc.page_content = f"""Doc {i+1} - {doc.metadata['short_name']}: {doc.page_content}"""
108
  docs_with_metadata.append(doc)
109
  return docs_with_metadata
110
 
111
+ def remove_duplicates_chunks(docs):
112
+ # Remove duplicates or almost duplicates
113
+ docs = sorted(docs,key=lambda x: x[1],reverse=True)
114
+ seen = set()
115
+ result = []
116
+ for doc in docs:
117
+ if doc[0].page_content not in seen:
118
+ seen.add(doc[0].page_content)
119
+ result.append(doc)
120
+ return result
121
+
122
+ async def get_POC_relevant_documents(
123
+ query: str,
124
+ vectorstore:VectorStore,
125
+ sources:list = ["Acclimaterra","PCAET","Plan Biodiversite"],
126
+ search_figures:bool = False,
127
+ search_only:bool = False,
128
+ k_documents:int = 10,
129
+ threshold:float = 0.6,
130
+ k_images: int = 5,
131
+ reports:list = [],
132
+ min_size:int = 200,
133
+ ) :
134
+ # Prepare base search kwargs
135
+ filters = {}
136
+ docs_question = []
137
+ docs_images = []
138
+
139
+ # TODO add source selection
140
+ # if len(reports) > 0:
141
+ # filters["short_name"] = {"$in":reports}
142
+ # else:
143
+ # filters["source"] = { "$in": sources}
144
+
145
+ filters_text = {
146
+ **filters,
147
+ "chunk_type":"text",
148
+ # "report_type": {}, # TODO to be completed to choose the right documents / chapters according to the analysis of the question
149
+ }
150
+
151
+ docs_question = vectorstore.similarity_search_with_score(query=query,filter = filters_text,k = k_documents)
152
+ # remove duplicates or almost duplicates
153
+ docs_question = remove_duplicates_chunks(docs_question)
154
+ docs_question = [x for x in docs_question if x[1] > threshold]
155
+
156
+ if search_figures:
157
+ # Images
158
+ filters_image = {
159
+ **filters,
160
+ "chunk_type":"image"
161
+ }
162
+ docs_images = vectorstore.similarity_search_with_score(query=query,filter = filters_image,k = k_images)
163
+
164
+ docs_question, docs_images = _add_metadata_and_score(docs_question), _add_metadata_and_score(docs_images)
165
+
166
+ docs_question = [x for x in docs_question if len(x.page_content) > min_size]
167
+
168
+ return {
169
+ "docs_question" : docs_question,
170
+ "docs_images" : docs_images
171
+ }
172
+
173
+
174
  async def get_IPCC_relevant_documents(
175
  query: str,
176
  vectorstore:VectorStore,
 
232
  "chunk_type":"text",
233
  "report_type": { "$nin":["SPM"]},
234
  }
235
+ docs_full = vectorstore.similarity_search_with_score(query=query,filter = filters_full,k = k_total)
 
236
 
237
  if search_figures:
238
  # Images
 
255
  }
256
 
257
 
258
+
259
+ def concatenate_documents(index, source_type, docs_question_dict, k_by_question, k_summary_by_question, k_images_by_question):
260
+ # Keep the right number of documents - The k_summary documents from SPM are placed in front
261
+ if source_type == "IPx":
262
+ docs_question = docs_question_dict["docs_summaries"][:k_summary_by_question] + docs_question_dict["docs_full"][:(k_by_question - k_summary_by_question)]
263
+ elif source_type == "POC" :
264
+ docs_question = docs_question_dict["docs_question"][:k_by_question]
265
+ else :
266
+ raise ValueError("source_type should be either Vector or POC")
267
+ # docs_question = [doc for key in docs_question_dict.keys() for doc in docs_question_dict[key]][:(k_by_question)]
268
+
269
+ images_question = docs_question_dict["docs_images"][:k_images_by_question]
270
+
271
+ return docs_question, images_question
272
+
273
 
274
  # The chain callback is not necessary, but it propagates the langchain callbacks to the astream_events logger to display intermediate results
275
  # @chain
276
+ async def retrieve_documents(
277
+ current_question: Dict[str, Any],
278
+ config: Dict[str, Any],
279
+ source_type: str,
280
+ vectorstore: VectorStore,
281
+ reranker: Any,
282
+ search_figures: bool = False,
283
+ search_only: bool = False,
284
+ reports: list = [],
285
+ rerank_by_question: bool = True,
286
+ k_images_by_question: int = 5,
287
+ k_before_reranking: int = 100,
288
+ k_by_question: int = 5,
289
+ k_summary_by_question: int = 3
290
+ ) -> Tuple[List[Document], List[Document]]:
291
  """
292
+ Unpack the first question of the remaining questions, and retrieve and rerank corresponding documents, based on the question and selected_sources
293
 
294
  Args:
295
  state (dict): The current state containing documents, related content, relevant content sources, remaining questions and n_questions.
296
+ current_question (dict): The current question being processed.
297
  config (dict): Configuration settings for logging and other purposes.
298
  vectorstore (object): The vector store used to retrieve relevant documents.
299
  reranker (object): The reranker used to rerank the retrieved documents.
 
306
  Returns:
307
  dict: The updated state containing the retrieved and reranked documents, related content, and remaining questions.
308
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  sources = current_question["sources"]
310
  question = current_question["question"]
311
  index = current_question["index"]
312
+ source_type = current_question["source_type"]
313
 
314
  print(f"Retrieve documents for question: {question}")
315
  await log_event({"question":question,"sources":sources,"index":index},"log_retriever",config)
316
 
317
+ print(f"""---- Retrieve documents from {current_question["source_type"]}----""")
318
 
319
+ if source_type == "IPx":
320
  docs_question_dict = await get_IPCC_relevant_documents(
321
  query = question,
322
  vectorstore=vectorstore,
323
  search_figures = search_figures,
324
  sources = sources,
325
  min_size = 200,
326
+ k_summary = k_before_reranking-1,
327
  k_total = k_before_reranking,
328
  k_images = k_images_by_question,
329
  threshold = 0.5,
330
  search_only = search_only,
331
+ reports = reports,
332
  )
 
333
 
334
+ if source_type == "POC":
335
+ docs_question_dict = await get_POC_relevant_documents(
336
+ query = question,
337
+ vectorstore=vectorstore,
338
+ search_figures = search_figures,
339
+ sources = sources,
340
+ threshold = 0.5,
341
+ search_only = search_only,
342
+ reports = reports,
343
+ min_size= 200,
344
+ k_documents= k_before_reranking,
345
+ k_images= k_by_question
346
+ )
347
+
348
  # Rerank
349
+ if reranker is not None and rerank_by_question:
350
  with suppress_output():
351
+ for key in docs_question_dict.keys():
352
+ docs_question_dict[key] = rerank_and_sort_docs(reranker,docs_question_dict[key],question)
 
 
 
 
 
353
  else:
 
354
  # Add a default reranking score
355
  for doc in docs_question:
356
  doc.metadata["reranking_score"] = doc.metadata["similarity_score"]
357
 
358
+ # Keep the right number of documents
359
+ docs_question, images_question = concatenate_documents(index, source_type, docs_question_dict, k_by_question, k_summary_by_question, k_images_by_question)
360
+
361
+ # Rerank the documents to put the most relevant in front
362
  if reranker is not None and rerank_by_question:
363
+ docs_question = rerank_and_sort_docs(reranker, docs_question, question)
364
+
365
  # Add sources used in the metadata
366
  docs_question = _add_sources_used_in_metadata(docs_question,sources,question,index)
367
  images_question = _add_sources_used_in_metadata(images_question,sources,question,index)
368
 
369
+ return docs_question, images_question
370
+
371
+
372
+ async def retrieve_documents_for_all_questions(
373
+ search_figures,
374
+ search_only,
375
+ reports,
376
+ questions_list,
377
+ n_questions,
378
+ config,
379
+ source_type,
380
+ to_handle_questions_index,
381
+ vectorstore,
382
+ reranker,
383
+ rerank_by_question=True,
384
+ k_final=15,
385
+ k_before_reranking=100
386
+ ):
387
+ """
388
+ Retrieve documents in parallel for all questions.
389
+ """
390
+ # to_handle_questions_index = [x for x in state["questions_list"] if x["source_type"] == "IPx"]
391
+
392
+ # TODO split les questions selon le type de sources dans le state question + conditions sur le nombre de questions traités par type de source
393
+ # search_figures = "Figures (IPCC/IPBES)" in state["relevant_content_sources_selection"]
394
+ # search_only = state["search_only"]
395
+ # reports = state["reports"]
396
+ # questions_list = state["questions_list"]
397
+
398
+ # k_by_question = k_final // state["n_questions"]["total"]
399
+ # k_summary_by_question = _get_k_summary_by_question(state["n_questions"]["total"])
400
+ # k_images_by_question = _get_k_images_by_question(state["n_questions"]["total"])
401
+ k_by_question = k_final // n_questions
402
+ k_summary_by_question = _get_k_summary_by_question(n_questions)
403
+ k_images_by_question = _get_k_images_by_question(n_questions)
404
+ k_before_reranking=100
405
+
406
+ tasks = [
407
+ retrieve_documents(
408
+ current_question=question,
409
+ config=config,
410
+ source_type=source_type,
411
+ vectorstore=vectorstore,
412
+ reranker=reranker,
413
+ search_figures=search_figures,
414
+ search_only=search_only,
415
+ reports=reports,
416
+ rerank_by_question=rerank_by_question,
417
+ k_images_by_question=k_images_by_question,
418
+ k_before_reranking=k_before_reranking,
419
+ k_by_question=k_by_question,
420
+ k_summary_by_question=k_summary_by_question
421
+ )
422
+ for i, question in enumerate(questions_list) if i in to_handle_questions_index
423
+ ]
424
+ results = await asyncio.gather(*tasks)
425
+ # Combine results
426
+ new_state = {"documents": [], "related_contents": [], "handled_questions_index": to_handle_questions_index}
427
+ for docs_question, images_question in results:
428
+ new_state["documents"].extend(docs_question)
429
+ new_state["related_contents"].extend(images_question)
430
  return new_state
431
+
432
+ def make_IPx_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5):
433
+
434
+ async def retrieve_IPx_docs(state, config):
435
+ source_type = "IPx"
436
+ IPx_questions_index = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "IPx"]
437
+
438
+ search_figures = "Figures (IPCC/IPBES)" in state["relevant_content_sources_selection"]
439
+ search_only = state["search_only"]
440
+ reports = state["reports"]
441
+ questions_list = state["questions_list"]
442
+ n_questions=state["n_questions"]["total"]
443
+
444
+ state = await retrieve_documents_for_all_questions(
445
+ search_figures=search_figures,
446
+ search_only=search_only,
447
+ reports=reports,
448
+ questions_list=questions_list,
449
+ n_questions=n_questions,
450
+ config=config,
451
+ source_type=source_type,
452
+ to_handle_questions_index=IPx_questions_index,
453
+ vectorstore=vectorstore,
454
+ reranker=reranker,
455
+ rerank_by_question=rerank_by_question,
456
+ k_final=k_final,
457
+ k_before_reranking=k_before_reranking,
458
+ )
459
+ return state
460
 
461
+ return retrieve_IPx_docs
462
 
463
 
464
+ def make_POC_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5):
465
+
466
+ async def retrieve_POC_docs_node(state, config):
467
+ if "POC region" not in state["relevant_content_sources_selection"] :
468
+ return {}
469
+
470
+ source_type = "POC"
471
+ POC_questions_index = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "POC"]
472
+
473
+ search_figures = "Figures (IPCC/IPBES)" in state["relevant_content_sources_selection"]
474
+ search_only = state["search_only"]
475
+ reports = state["reports"]
476
+ questions_list = state["questions_list"]
477
+ n_questions=state["n_questions"]["total"]
478
+
479
+ state = await retrieve_documents_for_all_questions(
480
+ search_figures=search_figures,
481
+ search_only=search_only,
482
+ reports=reports,
483
+ questions_list=questions_list,
484
+ n_questions=n_questions,
485
+ config=config,
486
+ source_type=source_type,
487
+ to_handle_questions_index=POC_questions_index,
488
+ vectorstore=vectorstore,
489
+ reranker=reranker,
490
+ rerank_by_question=rerank_by_question,
491
+ k_final=k_final,
492
+ k_before_reranking=k_before_reranking,
493
+ )
494
  return state
495
 
496
+ return retrieve_POC_docs_node
497
 
498
 
499
 
climateqa/engine/chains/retrieve_papers.py CHANGED
@@ -32,8 +32,8 @@ def generate_keywords(query):
32
  return keywords
33
 
34
 
35
- async def find_papers(query,after, relevant_content_sources, reranker= reranker):
36
- if "OpenAlex" in relevant_content_sources:
37
  summary = ""
38
  keywords = generate_keywords(query)
39
  df_works = oa.search(keywords,after = after)
 
32
  return keywords
33
 
34
 
35
+ async def find_papers(query,after, relevant_content_sources_selection, reranker= reranker):
36
+ if "Papers (OpenAlex)" in relevant_content_sources_selection:
37
  summary = ""
38
  keywords = generate_keywords(query)
39
  df_works = oa.search(keywords,after = after)
climateqa/engine/graph.py CHANGED
@@ -9,6 +9,9 @@ from langchain_core.runnables.graph import CurveStyle, MermaidDrawMethod
9
  from typing_extensions import TypedDict
10
  from typing import List, Dict
11
 
 
 
 
12
  from IPython.display import display, HTML, Image
13
 
14
  from .chains.answer_chitchat import make_chitchat_node
@@ -16,10 +19,11 @@ from .chains.answer_ai_impact import make_ai_impact_node
16
  from .chains.query_transformation import make_query_transform_node
17
  from .chains.translation import make_translation_node
18
  from .chains.intent_categorization import make_intent_categorization_node
19
- from .chains.retrieve_documents import make_retriever_node
20
  from .chains.answer_rag import make_rag_node
21
  from .chains.graph_retriever import make_graph_retriever_node
22
  from .chains.chitchat_categorization import make_chitchat_intent_categorization_node
 
23
  # from .chains.set_defaults import set_defaults
24
 
25
  class GraphState(TypedDict):
@@ -31,25 +35,32 @@ class GraphState(TypedDict):
31
  intent : str
32
  search_graphs_chitchat : bool
33
  query: str
34
- remaining_questions : List[dict]
 
35
  n_questions : int
36
  answer: str
37
  audience: str = "experts"
38
- sources_input: List[str] = ["IPCC","IPBES"]
39
- relevant_content_sources: List[str] = ["IPCC figures"]
40
  sources_auto: bool = True
41
  min_year: int = 1960
42
  max_year: int = None
43
- documents: List[Document]
44
- related_contents : Dict[str,Document]
45
- recommended_content : List[Document]
46
  search_only : bool = False
 
 
 
 
 
 
47
 
48
  def search(state): #TODO
49
- return state
50
 
51
  def answer_search(state):#TODO
52
- return state
53
 
54
  def route_intent(state):
55
  intent = state["intent"]
@@ -59,12 +70,12 @@ def route_intent(state):
59
  # return "answer_ai_impact"
60
  else:
61
  # Search route
62
- return "search"
63
 
64
  def chitchat_route_intent(state):
65
  intent = state["search_graphs_chitchat"]
66
  if intent is True:
67
- return "retrieve_graphs_chitchat"
68
  elif intent is False:
69
  return END
70
 
@@ -72,27 +83,50 @@ def route_translation(state):
72
  if state["language"].lower() == "english":
73
  return "transform_query"
74
  else:
75
- return "translate_query"
 
 
76
 
77
  def route_based_on_relevant_docs(state,threshold_docs=0.2):
78
  docs = [x for x in state["documents"] if x.metadata["reranking_score"] > threshold_docs]
 
79
  if len(docs) > 0:
80
  return "answer_rag"
81
  else:
82
  return "answer_rag_no_docs"
83
 
84
- def route_retrieve_documents(state):
85
- if state["search_only"] :
86
- return END
87
- elif len(state["remaining_questions"]) > 0:
 
 
88
  return "retrieve_documents"
 
 
 
 
 
 
 
 
89
  else:
90
- return "answer_search"
 
 
 
 
 
 
 
 
 
 
91
 
92
  def make_id_dict(values):
93
  return {k:k for k in values}
94
 
95
- def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, threshold_docs=0.2):
96
 
97
  workflow = StateGraph(GraphState)
98
 
@@ -102,8 +136,9 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
102
  translate_query = make_translation_node(llm)
103
  answer_chitchat = make_chitchat_node(llm)
104
  answer_ai_impact = make_ai_impact_node(llm)
105
- retrieve_documents = make_retriever_node(vectorstore_ipcc, reranker, llm)
106
  retrieve_graphs = make_graph_retriever_node(vectorstore_graphs, reranker)
 
107
  answer_rag = make_rag_node(llm, with_docs=True)
108
  answer_rag_no_docs = make_rag_node(llm, with_docs=False)
109
  chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
@@ -111,13 +146,14 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
111
  # Define the nodes
112
  # workflow.add_node("set_defaults", set_defaults)
113
  workflow.add_node("categorize_intent", categorize_intent)
114
- workflow.add_node("search", search)
115
  workflow.add_node("answer_search", answer_search)
116
  workflow.add_node("transform_query", transform_query)
117
  workflow.add_node("translate_query", translate_query)
118
  workflow.add_node("answer_chitchat", answer_chitchat)
119
  workflow.add_node("chitchat_categorize_intent", chitchat_categorize_intent)
120
  workflow.add_node("retrieve_graphs", retrieve_graphs)
 
121
  workflow.add_node("retrieve_graphs_chitchat", retrieve_graphs)
122
  workflow.add_node("retrieve_documents", retrieve_documents)
123
  workflow.add_node("answer_rag", answer_rag)
@@ -130,7 +166,7 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
130
  workflow.add_conditional_edges(
131
  "categorize_intent",
132
  route_intent,
133
- make_id_dict(["answer_chitchat","search"])
134
  )
135
 
136
  workflow.add_conditional_edges(
@@ -140,15 +176,98 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
140
  )
141
 
142
  workflow.add_conditional_edges(
143
- "search",
144
  route_translation,
145
  make_id_dict(["translate_query","transform_query"])
146
  )
 
147
  workflow.add_conditional_edges(
148
- "retrieve_documents",
149
- # lambda state : "retrieve_documents" if len(state["remaining_questions"]) > 0 else "answer_search",
 
 
 
 
150
  route_retrieve_documents,
151
- make_id_dict([END,"retrieve_documents","answer_search"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  )
153
 
154
  workflow.add_conditional_edges(
@@ -158,13 +277,15 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
158
  )
159
  workflow.add_conditional_edges(
160
  "transform_query",
161
- lambda state : "retrieve_graphs" if "OurWorldInData" in state["relevant_content_sources"] else END,
162
  make_id_dict(["retrieve_graphs", END])
163
  )
164
 
165
  # Define the edges
166
  workflow.add_edge("translate_query", "transform_query")
167
- workflow.add_edge("transform_query", "retrieve_documents")
 
 
168
 
169
  workflow.add_edge("retrieve_graphs", END)
170
  workflow.add_edge("answer_rag", END)
@@ -172,6 +293,12 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
172
  workflow.add_edge("answer_chitchat", "chitchat_categorize_intent")
173
  workflow.add_edge("retrieve_graphs_chitchat", END)
174
 
 
 
 
 
 
 
175
 
176
  # Compile
177
  app = workflow.compile()
 
9
  from typing_extensions import TypedDict
10
  from typing import List, Dict
11
 
12
+ import operator
13
+ from typing import Annotated
14
+ import pandas as pd
15
  from IPython.display import display, HTML, Image
16
 
17
  from .chains.answer_chitchat import make_chitchat_node
 
19
  from .chains.query_transformation import make_query_transform_node
20
  from .chains.translation import make_translation_node
21
  from .chains.intent_categorization import make_intent_categorization_node
22
+ from .chains.retrieve_documents import make_IPx_retriever_node, make_POC_retriever_node
23
  from .chains.answer_rag import make_rag_node
24
  from .chains.graph_retriever import make_graph_retriever_node
25
  from .chains.chitchat_categorization import make_chitchat_intent_categorization_node
26
+ from .chains.drias_retriever import make_drias_retriever_node
27
  # from .chains.set_defaults import set_defaults
28
 
29
  class GraphState(TypedDict):
 
35
  intent : str
36
  search_graphs_chitchat : bool
37
  query: str
38
+ questions_list : List[dict]
39
+ handled_questions_index : Annotated[list[int], operator.add]
40
  n_questions : int
41
  answer: str
42
  audience: str = "experts"
43
+ sources_input: List[str] = ["IPCC","IPBES"] # Deprecated -> used only graphs that can only be OWID
44
+ relevant_content_sources_selection: List[str] = ["Figures (IPCC/IPBES)"]
45
  sources_auto: bool = True
46
  min_year: int = 1960
47
  max_year: int = None
48
+ documents: Annotated[List[Document], operator.add]
49
+ related_contents : Annotated[List[Document], operator.add] # Images
50
+ recommended_content : List[Document] # OWID Graphs # TODO merge with related_contents
51
  search_only : bool = False
52
+ reports : List[str] = []
53
+ drias_data: pd.DataFrame
54
+ drias_sql_query : str
55
+
56
+ def dummy(state):
57
+ return
58
 
59
  def search(state): #TODO
60
+ return
61
 
62
  def answer_search(state):#TODO
63
+ return
64
 
65
  def route_intent(state):
66
  intent = state["intent"]
 
70
  # return "answer_ai_impact"
71
  else:
72
  # Search route
73
+ return "answer_climate"
74
 
75
  def chitchat_route_intent(state):
76
  intent = state["search_graphs_chitchat"]
77
  if intent is True:
78
+ return END #TODO
79
  elif intent is False:
80
  return END
81
 
 
83
  if state["language"].lower() == "english":
84
  return "transform_query"
85
  else:
86
+ return "transform_query"
87
+ # return "translate_query" #TODO : add translation
88
+
89
 
90
  def route_based_on_relevant_docs(state,threshold_docs=0.2):
91
  docs = [x for x in state["documents"] if x.metadata["reranking_score"] > threshold_docs]
92
+ print("Route : ", ["answer_rag" if len(docs) > 0 else "answer_rag_no_docs"])
93
  if len(docs) > 0:
94
  return "answer_rag"
95
  else:
96
  return "answer_rag_no_docs"
97
 
98
+ def route_continue_retrieve_documents(state):
99
+ index_question_ipx = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "IPx"]
100
+ questions_ipx_finished = all(elem in state["handled_questions_index"] for elem in index_question_ipx)
101
+ if questions_ipx_finished:
102
+ return "end_retrieve_IPx_documents"
103
+ else:
104
  return "retrieve_documents"
105
+
106
+ def route_continue_retrieve_local_documents(state):
107
+ index_question_poc = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "POC"]
108
+ questions_poc_finished = all(elem in state["handled_questions_index"] for elem in index_question_poc)
109
+ # if questions_poc_finished and state["search_only"]:
110
+ # return END
111
+ if questions_poc_finished or ("POC region" not in state["relevant_content_sources_selection"]):
112
+ return "end_retrieve_local_documents"
113
  else:
114
+ return "retrieve_local_data"
115
+
116
+ def route_retrieve_documents(state):
117
+ sources_to_retrieve = []
118
+
119
+ if "Graphs (OurWorldInData)" in state["relevant_content_sources_selection"] :
120
+ sources_to_retrieve.append("retrieve_graphs")
121
+
122
+ if sources_to_retrieve == []:
123
+ return END
124
+ return sources_to_retrieve
125
 
126
  def make_id_dict(values):
127
  return {k:k for k in values}
128
 
129
+ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_region, reranker, threshold_docs=0.2):
130
 
131
  workflow = StateGraph(GraphState)
132
 
 
136
  translate_query = make_translation_node(llm)
137
  answer_chitchat = make_chitchat_node(llm)
138
  answer_ai_impact = make_ai_impact_node(llm)
139
+ retrieve_documents = make_IPx_retriever_node(vectorstore_ipcc, reranker, llm)
140
  retrieve_graphs = make_graph_retriever_node(vectorstore_graphs, reranker)
141
+ # retrieve_local_data = make_POC_retriever_node(vectorstore_region, reranker, llm)
142
  answer_rag = make_rag_node(llm, with_docs=True)
143
  answer_rag_no_docs = make_rag_node(llm, with_docs=False)
144
  chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
 
146
  # Define the nodes
147
  # workflow.add_node("set_defaults", set_defaults)
148
  workflow.add_node("categorize_intent", categorize_intent)
149
+ workflow.add_node("answer_climate", dummy)
150
  workflow.add_node("answer_search", answer_search)
151
  workflow.add_node("transform_query", transform_query)
152
  workflow.add_node("translate_query", translate_query)
153
  workflow.add_node("answer_chitchat", answer_chitchat)
154
  workflow.add_node("chitchat_categorize_intent", chitchat_categorize_intent)
155
  workflow.add_node("retrieve_graphs", retrieve_graphs)
156
+ # workflow.add_node("retrieve_local_data", retrieve_local_data)
157
  workflow.add_node("retrieve_graphs_chitchat", retrieve_graphs)
158
  workflow.add_node("retrieve_documents", retrieve_documents)
159
  workflow.add_node("answer_rag", answer_rag)
 
166
  workflow.add_conditional_edges(
167
  "categorize_intent",
168
  route_intent,
169
+ make_id_dict(["answer_chitchat","answer_climate"])
170
  )
171
 
172
  workflow.add_conditional_edges(
 
176
  )
177
 
178
  workflow.add_conditional_edges(
179
+ "answer_climate",
180
  route_translation,
181
  make_id_dict(["translate_query","transform_query"])
182
  )
183
+
184
  workflow.add_conditional_edges(
185
+ "answer_search",
186
+ lambda x : route_based_on_relevant_docs(x,threshold_docs=threshold_docs),
187
+ make_id_dict(["answer_rag","answer_rag_no_docs"])
188
+ )
189
+ workflow.add_conditional_edges(
190
+ "transform_query",
191
  route_retrieve_documents,
192
+ make_id_dict(["retrieve_graphs", END])
193
+ )
194
+
195
+ # Define the edges
196
+ workflow.add_edge("translate_query", "transform_query")
197
+ workflow.add_edge("transform_query", "retrieve_documents") #TODO put back
198
+ # workflow.add_edge("transform_query", "retrieve_local_data")
199
+ # workflow.add_edge("transform_query", END) # TODO remove
200
+
201
+ workflow.add_edge("retrieve_graphs", END)
202
+ workflow.add_edge("answer_rag", END)
203
+ workflow.add_edge("answer_rag_no_docs", END)
204
+ workflow.add_edge("answer_chitchat", "chitchat_categorize_intent")
205
+ workflow.add_edge("retrieve_graphs_chitchat", END)
206
+
207
+ # workflow.add_edge("retrieve_local_data", "answer_search")
208
+ workflow.add_edge("retrieve_documents", "answer_search")
209
+
210
+ # Compile
211
+ app = workflow.compile()
212
+ return app
213
+
214
+ def make_graph_agent_poc(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_region, reranker, threshold_docs=0.2):
215
+
216
+ workflow = StateGraph(GraphState)
217
+
218
+ # Define the node functions
219
+ categorize_intent = make_intent_categorization_node(llm)
220
+ transform_query = make_query_transform_node(llm)
221
+ translate_query = make_translation_node(llm)
222
+ answer_chitchat = make_chitchat_node(llm)
223
+ answer_ai_impact = make_ai_impact_node(llm)
224
+ retrieve_documents = make_IPx_retriever_node(vectorstore_ipcc, reranker, llm)
225
+ retrieve_graphs = make_graph_retriever_node(vectorstore_graphs, reranker)
226
+ retrieve_local_data = make_POC_retriever_node(vectorstore_region, reranker, llm)
227
+ answer_rag = make_rag_node(llm, with_docs=True)
228
+ answer_rag_no_docs = make_rag_node(llm, with_docs=False)
229
+ chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
230
+ # retrieve_drias_data = make_drias_retriever_node(llm) # WIP
231
+
232
+ # Define the nodes
233
+ # workflow.add_node("set_defaults", set_defaults)
234
+ workflow.add_node("categorize_intent", categorize_intent)
235
+ workflow.add_node("answer_climate", dummy)
236
+ workflow.add_node("answer_search", answer_search)
237
+ # workflow.add_node("end_retrieve_local_documents", dummy)
238
+ # workflow.add_node("end_retrieve_IPx_documents", dummy)
239
+ workflow.add_node("transform_query", transform_query)
240
+ workflow.add_node("translate_query", translate_query)
241
+ workflow.add_node("answer_chitchat", answer_chitchat)
242
+ workflow.add_node("chitchat_categorize_intent", chitchat_categorize_intent)
243
+ workflow.add_node("retrieve_graphs", retrieve_graphs)
244
+ workflow.add_node("retrieve_local_data", retrieve_local_data)
245
+ workflow.add_node("retrieve_graphs_chitchat", retrieve_graphs)
246
+ workflow.add_node("retrieve_documents", retrieve_documents)
247
+ workflow.add_node("answer_rag", answer_rag)
248
+ workflow.add_node("answer_rag_no_docs", answer_rag_no_docs)
249
+ # workflow.add_node("retrieve_drias_data", retrieve_drias_data)# WIP
250
+
251
+ # Entry point
252
+ workflow.set_entry_point("categorize_intent")
253
+
254
+ # CONDITIONAL EDGES
255
+ workflow.add_conditional_edges(
256
+ "categorize_intent",
257
+ route_intent,
258
+ make_id_dict(["answer_chitchat","answer_climate"])
259
+ )
260
+
261
+ workflow.add_conditional_edges(
262
+ "chitchat_categorize_intent",
263
+ chitchat_route_intent,
264
+ make_id_dict(["retrieve_graphs_chitchat", END])
265
+ )
266
+
267
+ workflow.add_conditional_edges(
268
+ "answer_climate",
269
+ route_translation,
270
+ make_id_dict(["translate_query","transform_query"])
271
  )
272
 
273
  workflow.add_conditional_edges(
 
277
  )
278
  workflow.add_conditional_edges(
279
  "transform_query",
280
+ route_retrieve_documents,
281
  make_id_dict(["retrieve_graphs", END])
282
  )
283
 
284
  # Define the edges
285
  workflow.add_edge("translate_query", "transform_query")
286
+ workflow.add_edge("transform_query", "retrieve_documents") #TODO put back
287
+ workflow.add_edge("transform_query", "retrieve_local_data")
288
+ # workflow.add_edge("transform_query", END) # TODO remove
289
 
290
  workflow.add_edge("retrieve_graphs", END)
291
  workflow.add_edge("answer_rag", END)
 
293
  workflow.add_edge("answer_chitchat", "chitchat_categorize_intent")
294
  workflow.add_edge("retrieve_graphs_chitchat", END)
295
 
296
+ workflow.add_edge("retrieve_local_data", "answer_search")
297
+ workflow.add_edge("retrieve_documents", "answer_search")
298
+
299
+ # workflow.add_edge("transform_query", "retrieve_drias_data")
300
+ # workflow.add_edge("retrieve_drias_data", END)
301
+
302
 
303
  # Compile
304
  app = workflow.compile()
climateqa/engine/reranker.py CHANGED
@@ -47,4 +47,9 @@ def rerank_docs(reranker,docs,query):
47
  doc.metadata["reranking_score"] = result.score
48
  doc.metadata["query_used_for_retrieval"] = query
49
  docs_reranked.append(doc)
 
 
 
 
 
50
  return docs_reranked
 
47
  doc.metadata["reranking_score"] = result.score
48
  doc.metadata["query_used_for_retrieval"] = query
49
  docs_reranked.append(doc)
50
+ return docs_reranked
51
+
52
+ def rerank_and_sort_docs(reranker, docs, query):
53
+ docs_reranked = rerank_docs(reranker,docs,query)
54
+ docs_reranked = sorted(docs_reranked, key=lambda x: x.metadata["reranking_score"], reverse=True)
55
  return docs_reranked
climateqa/engine/talk_to_data/main.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from climateqa.engine.talk_to_data.myVanna import MyVanna
2
+ from climateqa.engine.talk_to_data.utils import loc2coords, detect_location_with_openai, detectTable, nearestNeighbourSQL, detect_relevant_tables, replace_coordonates
3
+ import sqlite3
4
+ import os
5
+ import pandas as pd
6
+ from climateqa.engine.llm import get_llm
7
+
8
+ from dotenv import load_dotenv
9
+ import ast
10
+
11
+ load_dotenv()
12
+
13
+
14
+ OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
15
+ PC_API_KEY = os.getenv('VANNA_PINECONE_API_KEY')
16
+ INDEX_NAME = os.getenv('VANNA_INDEX_NAME')
17
+ VANNA_MODEL = os.getenv('VANNA_MODEL')
18
+
19
+
20
+ #Vanna object
21
+ vn = MyVanna(config = {"temperature": 0, "api_key": OPENAI_API_KEY, 'model': VANNA_MODEL, 'pc_api_key': PC_API_KEY, 'index_name': INDEX_NAME, "top_k" : 4})
22
+ db_vanna_path = os.path.join(os.path.dirname(__file__), "database/drias.db")
23
+ vn.connect_to_sqlite(db_vanna_path)
24
+
25
+ llm = get_llm(provider="openai")
26
+
27
+ def ask_llm_to_add_table_names(sql_query, llm):
28
+ sql_with_table_names = llm.invoke(f"Make the following sql query display the source table in the rows {sql_query}. Just answer the query. The answer should not include ```sql\n").content
29
+ return sql_with_table_names
30
+
31
+ def ask_llm_column_names(sql_query, llm):
32
+ columns = llm.invoke(f"From the given sql query, list the columns that are being selected. The answer should only be a python list. Just answer the list. The SQL query : {sql_query}").content
33
+ columns_list = ast.literal_eval(columns.strip("```python\n").strip())
34
+ return columns_list
35
+
36
+ def ask_vanna(query):
37
+ try :
38
+ location = detect_location_with_openai(OPENAI_API_KEY, query)
39
+ if location:
40
+
41
+ coords = loc2coords(location)
42
+ user_input = query.lower().replace(location.lower(), f"lat, long : {coords}")
43
+
44
+ relevant_tables = detect_relevant_tables(user_input, llm)
45
+ coords_tables = [nearestNeighbourSQL(db_vanna_path, coords, relevant_tables[i]) for i in range(len(relevant_tables))]
46
+ user_input_with_coords = replace_coordonates(coords, user_input, coords_tables)
47
+
48
+ sql_query, result_dataframe, figure = vn.ask(user_input_with_coords, print_results=False, allow_llm_to_see_data=True, auto_train=False)
49
+
50
+ return sql_query, result_dataframe, figure
51
+
52
+ else :
53
+ empty_df = pd.DataFrame()
54
+ empty_fig = {}
55
+ return "", empty_df, empty_fig
56
+ except Exception as e:
57
+ print(f"Error: {e}")
58
+ empty_df = pd.DataFrame()
59
+ empty_fig = {}
60
+ return "", empty_df, empty_fig
climateqa/engine/talk_to_data/myVanna.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ from climateqa.engine.talk_to_data.vanna_class import MyCustomVectorDB
3
+ from vanna.openai import OpenAI_Chat
4
+ import os
5
+
6
+ load_dotenv()
7
+
8
+ OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
9
+
10
+ class MyVanna(MyCustomVectorDB, OpenAI_Chat):
11
+ def __init__(self, config=None):
12
+ MyCustomVectorDB.__init__(self, config=config)
13
+ OpenAI_Chat.__init__(self, config=config)
climateqa/engine/talk_to_data/utils.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import openai
3
+ import pandas as pd
4
+ from geopy.geocoders import Nominatim
5
+ import sqlite3
6
+ import ast
7
+
8
+
9
+ def detect_location_with_openai(api_key, sentence):
10
+ """
11
+ Detects locations in a sentence using OpenAI's API.
12
+ """
13
+ openai.api_key = api_key
14
+
15
+ prompt = f"""
16
+ Extract all locations (cities, countries, states, or geographical areas) mentioned in the following sentence.
17
+ Return the result as a Python list. If no locations are mentioned, return an empty list.
18
+
19
+ Sentence: "{sentence}"
20
+ """
21
+
22
+ response = openai.chat.completions.create(
23
+ model="gpt-4o-mini",
24
+ messages=[
25
+ {"role": "system", "content": "You are a helpful assistant skilled in identifying locations in text."},
26
+ {"role": "user", "content": prompt}
27
+ ],
28
+ max_tokens=100,
29
+ temperature=0
30
+ )
31
+
32
+ return response.choices[0].message.content.split("\n")[1][2:-2]
33
+
34
+
35
+ def detectTable(sql_query):
36
+ pattern = r'(?i)\bFROM\s+((?:`[^`]+`|"[^"]+"|\'[^\']+\'|\w+)(?:\.(?:`[^`]+`|"[^"]+"|\'[^\']+\'|\w+))*)'
37
+ matches = re.findall(pattern, sql_query)
38
+ return matches
39
+
40
+
41
+
42
+ def loc2coords(location : str):
43
+ geolocator = Nominatim(user_agent="city_to_latlong")
44
+ location = geolocator.geocode(location)
45
+ return (location.latitude, location.longitude)
46
+
47
+
48
+ def coords2loc(coords : tuple):
49
+ geolocator = Nominatim(user_agent="coords_to_city")
50
+ try:
51
+ location = geolocator.reverse(coords)
52
+ return location.address
53
+ except Exception as e:
54
+ print(f"Error: {e}")
55
+ return "Unknown Location"
56
+
57
+
58
+ def nearestNeighbourSQL(db: str, location: tuple, table : str):
59
+ conn = sqlite3.connect(db)
60
+ long = round(location[1], 3)
61
+ lat = round(location[0], 3)
62
+ cursor = conn.cursor()
63
+ cursor.execute(f"SELECT lat, lon FROM {table} WHERE lat BETWEEN {lat - 0.3} AND {lat + 0.3} AND lon BETWEEN {long - 0.3} AND {long + 0.3}")
64
+ results = cursor.fetchall()
65
+ return results[0]
66
+
67
+ def detect_relevant_tables(user_question, llm):
68
+ table_names_list = [
69
+ "Frequency_of_rainy_days_index",
70
+ "Winter_precipitation_total",
71
+ "Summer_precipitation_total",
72
+ "Annual_precipitation_total",
73
+ # "Remarkable_daily_precipitation_total_(Q99)",
74
+ "Frequency_of_remarkable_daily_precipitation",
75
+ "Extreme_precipitation_intensity",
76
+ "Mean_winter_temperature",
77
+ "Mean_summer_temperature",
78
+ "Number_of_tropical_nights",
79
+ "Maximum_summer_temperature",
80
+ "Number_of_days_with_Tx_above_30C",
81
+ "Number_of_days_with_Tx_above_35C",
82
+ "Drought_index"
83
+ ]
84
+ prompt = (
85
+ f"You are helping to build a sql query to retrieve relevant data for a user question."
86
+ f"The different tables are {table_names_list}."
87
+ f"The user question is {user_question}. Write the relevant tables to use. Answer only a python list of table name."
88
+ )
89
+ table_names = ast.literal_eval(llm.invoke(prompt).content.strip("```python\n").strip())
90
+ return table_names
91
+
92
+ def replace_coordonates(coords, query, coords_tables):
93
+ n = query.count(str(coords[0]))
94
+
95
+ for i in range(n):
96
+ query = query.replace(str(coords[0]), str(coords_tables[i][0]),1)
97
+ query = query.replace(str(coords[1]), str(coords_tables[i][1]),1)
98
+ return query
climateqa/engine/talk_to_data/vanna_class.py ADDED
@@ -0,0 +1,325 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from vanna.base import VannaBase
2
+ from pinecone import Pinecone
3
+ from climateqa.engine.embeddings import get_embeddings_function
4
+ import pandas as pd
5
+ import hashlib
6
+
7
+ class MyCustomVectorDB(VannaBase):
8
+
9
+ """
10
+ VectorDB class for storing and retrieving vectors from Pinecone.
11
+
12
+ args :
13
+ config (dict) : Configuration dictionary containing the Pinecone API key and the index name :
14
+ - pc_api_key (str) : Pinecone API key
15
+ - index_name (str) : Pinecone index name
16
+ - top_k (int) : Number of top results to return (default = 2)
17
+
18
+ """
19
+
20
+ def __init__(self,config):
21
+ super().__init__(config = config)
22
+ try :
23
+ self.api_key = config.get('pc_api_key')
24
+ self.index_name = config.get('index_name')
25
+ except :
26
+ raise Exception("Please provide the Pinecone API key and the index name")
27
+
28
+ self.pc = Pinecone(api_key = self.api_key)
29
+ self.index = self.pc.Index(self.index_name)
30
+ self.top_k = config.get('top_k', 2)
31
+ self.embeddings = get_embeddings_function()
32
+
33
+
34
+ def check_embedding(self, id, namespace):
35
+ fetched = self.index.fetch(ids = [id], namespace = namespace)
36
+ if fetched['vectors'] == {}:
37
+ return False
38
+ return True
39
+
40
+ def generate_hash_id(self, data: str) -> str:
41
+ """
42
+ Generate a unique hash ID for the given data.
43
+
44
+ Args:
45
+ data (str): The input data to hash (e.g., a concatenated string of user attributes).
46
+
47
+ Returns:
48
+ str: A unique hash ID as a hexadecimal string.
49
+ """
50
+
51
+ data_bytes = data.encode('utf-8')
52
+ hash_object = hashlib.sha256(data_bytes)
53
+ hash_id = hash_object.hexdigest()
54
+
55
+ return hash_id
56
+
57
+ def add_ddl(self, ddl: str, **kwargs) -> str:
58
+ id = self.generate_hash_id(ddl) + '_ddl'
59
+
60
+ if self.check_embedding(id, 'ddl'):
61
+ print(f"DDL having id {id} already exists")
62
+ return id
63
+
64
+ self.index.upsert(
65
+ vectors = [(id, self.embeddings.embed_query(ddl), {'ddl': ddl})],
66
+ namespace = 'ddl'
67
+ )
68
+
69
+ return id
70
+
71
+ def add_documentation(self, doc: str, **kwargs) -> str:
72
+ id = self.generate_hash_id(doc) + '_doc'
73
+
74
+ if self.check_embedding(id, 'documentation'):
75
+ print(f"Documentation having id {id} already exists")
76
+ return id
77
+
78
+ self.index.upsert(
79
+ vectors = [(id, self.embeddings.embed_query(doc), {'doc': doc})],
80
+ namespace = 'documentation'
81
+ )
82
+
83
+ return id
84
+
85
+ def add_question_sql(self, question: str, sql: str, **kwargs) -> str:
86
+ id = self.generate_hash_id(question) + '_sql'
87
+
88
+ if self.check_embedding(id, 'question_sql'):
89
+ print(f"Question-SQL pair having id {id} already exists")
90
+ return id
91
+
92
+ self.index.upsert(
93
+ vectors = [(id, self.embeddings.embed_query(question + sql), {'question': question, 'sql': sql})],
94
+ namespace = 'question_sql'
95
+ )
96
+
97
+ return id
98
+
99
+ def get_related_ddl(self, question: str, **kwargs) -> list:
100
+ res = self.index.query(
101
+ vector=self.embeddings.embed_query(question),
102
+ top_k=self.top_k,
103
+ namespace='ddl',
104
+ include_metadata=True
105
+ )
106
+
107
+ return [match['metadata']['ddl'] for match in res['matches']]
108
+
109
+ def get_related_documentation(self, question: str, **kwargs) -> list:
110
+ res = self.index.query(
111
+ vector=self.embeddings.embed_query(question),
112
+ top_k=self.top_k,
113
+ namespace='documentation',
114
+ include_metadata=True
115
+ )
116
+
117
+ return [match['metadata']['doc'] for match in res['matches']]
118
+
119
+ def get_similar_question_sql(self, question: str, **kwargs) -> list:
120
+ res = self.index.query(
121
+ vector=self.embeddings.embed_query(question),
122
+ top_k=self.top_k,
123
+ namespace='question_sql',
124
+ include_metadata=True
125
+ )
126
+
127
+ return [(match['metadata']['question'], match['metadata']['sql']) for match in res['matches']]
128
+
129
+ def get_training_data(self, **kwargs) -> pd.DataFrame:
130
+
131
+ list_of_data = []
132
+
133
+ namespaces = ['ddl', 'documentation', 'question_sql']
134
+
135
+ for namespace in namespaces:
136
+
137
+ data = self.index.query(
138
+ top_k=10000,
139
+ namespace=namespace,
140
+ include_metadata=True,
141
+ include_values=False
142
+ )
143
+
144
+ for match in data['matches']:
145
+ list_of_data.append(match['metadata'])
146
+
147
+ return pd.DataFrame(list_of_data)
148
+
149
+
150
+
151
+ def remove_training_data(self, id: str, **kwargs) -> bool:
152
+ if id.endswith("_ddl"):
153
+ self.Index.delete(ids=[id], namespace="_ddl")
154
+ return True
155
+ if id.endswith("_sql"):
156
+ self.index.delete(ids=[id], namespace="_sql")
157
+ return True
158
+
159
+ if id.endswith("_doc"):
160
+ self.Index.delete(ids=[id], namespace="_doc")
161
+ return True
162
+
163
+ return False
164
+
165
+ def generate_embedding(self, text, **kwargs):
166
+ # Implement the method here
167
+ pass
168
+
169
+
170
+ def get_sql_prompt(
171
+ self,
172
+ initial_prompt : str,
173
+ question: str,
174
+ question_sql_list: list,
175
+ ddl_list: list,
176
+ doc_list: list,
177
+ **kwargs,
178
+ ):
179
+ """
180
+ Example:
181
+ ```python
182
+ vn.get_sql_prompt(
183
+ question="What are the top 10 customers by sales?",
184
+ question_sql_list=[{"question": "What are the top 10 customers by sales?", "sql": "SELECT * FROM customers ORDER BY sales DESC LIMIT 10"}],
185
+ ddl_list=["CREATE TABLE customers (id INT, name TEXT, sales DECIMAL)"],
186
+ doc_list=["The customers table contains information about customers and their sales."],
187
+ )
188
+
189
+ ```
190
+
191
+ This method is used to generate a prompt for the LLM to generate SQL.
192
+
193
+ Args:
194
+ question (str): The question to generate SQL for.
195
+ question_sql_list (list): A list of questions and their corresponding SQL statements.
196
+ ddl_list (list): A list of DDL statements.
197
+ doc_list (list): A list of documentation.
198
+
199
+ Returns:
200
+ any: The prompt for the LLM to generate SQL.
201
+ """
202
+
203
+ if initial_prompt is None:
204
+ initial_prompt = f"You are a {self.dialect} expert. " + \
205
+ "Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions. "
206
+
207
+ initial_prompt = self.add_ddl_to_prompt(
208
+ initial_prompt, ddl_list, max_tokens=self.max_tokens
209
+ )
210
+
211
+ if self.static_documentation != "":
212
+ doc_list.append(self.static_documentation)
213
+
214
+ initial_prompt = self.add_documentation_to_prompt(
215
+ initial_prompt, doc_list, max_tokens=self.max_tokens
216
+ )
217
+
218
+ # initial_prompt = self.add_sql_to_prompt(
219
+ # initial_prompt, question_sql_list, max_tokens=self.max_tokens
220
+ # )
221
+
222
+
223
+ initial_prompt += (
224
+ "===Response Guidelines \n"
225
+ "1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. \n"
226
+ "2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql \n"
227
+ "3. If the provided context is insufficient, please give a sql query based on your knowledge and the context provided. \n"
228
+ "4. Please use the most relevant table(s). \n"
229
+ "5. If the question has been asked and answered before, please repeat the answer exactly as it was given before. \n"
230
+ f"6. Ensure that the output SQL is {self.dialect}-compliant and executable, and free of syntax errors. \n"
231
+ f"7. Add a description of the table in the result of the sql query, if relevant. \n"
232
+ "8 Make sure to include the relevant KPI in the SQL query. The query should return impactfull data \n"
233
+ # f"8. If a set of latitude,longitude is provided, make a intermediate query to find the nearest value in the table and replace the coordinates in the sql query. \n"
234
+ # "7. Add a description of the table in the result of the sql query."
235
+ # "7. If the question is about a specific latitude, longitude, query an interval of 0.3 and keep only the first set of coordinate. \n"
236
+ # "7. Table names should be included in the result of the sql query. Use for example Mean_winter_temperature AS table_name in the query \n"
237
+ )
238
+
239
+
240
+ message_log = [self.system_message(initial_prompt)]
241
+
242
+ for example in question_sql_list:
243
+ if example is None:
244
+ print("example is None")
245
+ else:
246
+ if example is not None and "question" in example and "sql" in example:
247
+ message_log.append(self.user_message(example["question"]))
248
+ message_log.append(self.assistant_message(example["sql"]))
249
+
250
+ message_log.append(self.user_message(question))
251
+
252
+ return message_log
253
+
254
+
255
+ # def get_sql_prompt(
256
+ # self,
257
+ # initial_prompt : str,
258
+ # question: str,
259
+ # question_sql_list: list,
260
+ # ddl_list: list,
261
+ # doc_list: list,
262
+ # **kwargs,
263
+ # ):
264
+ # """
265
+ # Example:
266
+ # ```python
267
+ # vn.get_sql_prompt(
268
+ # question="What are the top 10 customers by sales?",
269
+ # question_sql_list=[{"question": "What are the top 10 customers by sales?", "sql": "SELECT * FROM customers ORDER BY sales DESC LIMIT 10"}],
270
+ # ddl_list=["CREATE TABLE customers (id INT, name TEXT, sales DECIMAL)"],
271
+ # doc_list=["The customers table contains information about customers and their sales."],
272
+ # )
273
+
274
+ # ```
275
+
276
+ # This method is used to generate a prompt for the LLM to generate SQL.
277
+
278
+ # Args:
279
+ # question (str): The question to generate SQL for.
280
+ # question_sql_list (list): A list of questions and their corresponding SQL statements.
281
+ # ddl_list (list): A list of DDL statements.
282
+ # doc_list (list): A list of documentation.
283
+
284
+ # Returns:
285
+ # any: The prompt for the LLM to generate SQL.
286
+ # """
287
+
288
+ # if initial_prompt is None:
289
+ # initial_prompt = f"You are a {self.dialect} expert. " + \
290
+ # "Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions. "
291
+
292
+ # initial_prompt = self.add_ddl_to_prompt(
293
+ # initial_prompt, ddl_list, max_tokens=self.max_tokens
294
+ # )
295
+
296
+ # if self.static_documentation != "":
297
+ # doc_list.append(self.static_documentation)
298
+
299
+ # initial_prompt = self.add_documentation_to_prompt(
300
+ # initial_prompt, doc_list, max_tokens=self.max_tokens
301
+ # )
302
+
303
+ # initial_prompt += (
304
+ # "===Response Guidelines \n"
305
+ # "1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. \n"
306
+ # "2. If the provided context is almost sufficient but requires knowledge of a specific string in a particular column, please generate an intermediate SQL query to find the distinct strings in that column. Prepend the query with a comment saying intermediate_sql \n"
307
+ # "3. If the provided context is insufficient, please explain why it can't be generated. \n"
308
+ # "4. Please use the most relevant table(s). \n"
309
+ # "5. If the question has been asked and answered before, please repeat the answer exactly as it was given before. \n"
310
+ # f"6. Ensure that the output SQL is {self.dialect}-compliant and executable, and free of syntax errors. \n"
311
+ # )
312
+
313
+ # message_log = [self.system_message(initial_prompt)]
314
+
315
+ # for example in question_sql_list:
316
+ # if example is None:
317
+ # print("example is None")
318
+ # else:
319
+ # if example is not None and "question" in example and "sql" in example:
320
+ # message_log.append(self.user_message(example["question"]))
321
+ # message_log.append(self.assistant_message(example["sql"]))
322
+
323
+ # message_log.append(self.user_message(question))
324
+
325
+ # return message_log
climateqa/{event_handler.py → handle_stream_events.py} RENAMED
@@ -15,7 +15,14 @@ def init_audience(audience :str) -> str:
15
  audience_prompt = audience_prompts["experts"]
16
  return audience_prompt
17
 
18
- def handle_retrieved_documents(event: StreamEvent, history : list[ChatMessage], used_documents : list[str]) -> tuple[str, list[ChatMessage], list[str]]:
 
 
 
 
 
 
 
19
  """
20
  Handles the retrieved documents and returns the HTML representation of the documents
21
 
@@ -27,26 +34,22 @@ def handle_retrieved_documents(event: StreamEvent, history : list[ChatMessage],
27
  Returns:
28
  tuple[str, list[ChatMessage], list[str]]: The updated HTML representation of the documents, the updated message history and the updated list of used documents
29
  """
 
 
 
30
  try:
31
- docs = event["data"]["output"]["documents"]
32
- docs_html = []
33
- textual_docs = [d for d in docs if d.metadata["chunk_type"] == "text"]
34
- for i, d in enumerate(textual_docs, 1):
35
- if d.metadata["chunk_type"] == "text":
36
- docs_html.append(make_html_source(d, i))
37
 
38
  used_documents = used_documents + [f"{d.metadata['short_name']} - {d.metadata['name']}" for d in docs]
39
  if used_documents!=[]:
40
  history[-1].content = "Adding sources :\n\n - " + "\n - ".join(np.unique(used_documents))
41
-
42
- docs_html = "".join(docs_html)
43
 
44
- related_contents = event["data"]["output"]["related_contents"]
45
-
46
  except Exception as e:
47
  print(f"Error getting documents: {e}")
48
  print(event)
49
- return docs, docs_html, history, used_documents, related_contents
50
 
51
  def stream_answer(history: list[ChatMessage], event : StreamEvent, start_streaming : bool, answer_message_content : str)-> tuple[list[ChatMessage], bool, str]:
52
  """
 
15
  audience_prompt = audience_prompts["experts"]
16
  return audience_prompt
17
 
18
+ def convert_to_docs_to_html(docs: list[dict]) -> str:
19
+ docs_html = []
20
+ for i, d in enumerate(docs, 1):
21
+ if d.metadata["chunk_type"] == "text":
22
+ docs_html.append(make_html_source(d, i))
23
+ return "".join(docs_html)
24
+
25
+ def handle_retrieved_documents(event: StreamEvent, history : list[ChatMessage], used_documents : list[str],related_content:list[str]) -> tuple[str, list[ChatMessage], list[str]]:
26
  """
27
  Handles the retrieved documents and returns the HTML representation of the documents
28
 
 
34
  Returns:
35
  tuple[str, list[ChatMessage], list[str]]: The updated HTML representation of the documents, the updated message history and the updated list of used documents
36
  """
37
+ if "documents" not in event["data"]["output"] or event["data"]["output"]["documents"] == []:
38
+ return history, used_documents, related_content
39
+
40
  try:
41
+ docs = event["data"]["output"]["documents"]
 
 
 
 
 
42
 
43
  used_documents = used_documents + [f"{d.metadata['short_name']} - {d.metadata['name']}" for d in docs]
44
  if used_documents!=[]:
45
  history[-1].content = "Adding sources :\n\n - " + "\n - ".join(np.unique(used_documents))
46
+
47
+ #TODO do the same for related contents
48
 
 
 
49
  except Exception as e:
50
  print(f"Error getting documents: {e}")
51
  print(event)
52
+ return history, used_documents, related_content
53
 
54
  def stream_answer(history: list[ChatMessage], event : StreamEvent, start_streaming : bool, answer_message_content : str)-> tuple[list[ChatMessage], bool, str]:
55
  """
front/deprecated.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Functions to toggle visibility
3
+ def toggle_summary_visibility():
4
+ global summary_visible
5
+ summary_visible = not summary_visible
6
+ return gr.update(visible=summary_visible)
7
+
8
+ def toggle_relevant_visibility():
9
+ global relevant_visible
10
+ relevant_visible = not relevant_visible
11
+ return gr.update(visible=relevant_visible)
12
+
13
+ def change_completion_status(current_state):
14
+ current_state = 1 - current_state
15
+ return current_state
16
+
17
+
18
+
19
+ def vote(data: gr.LikeData):
20
+ if data.liked:
21
+ print(data.value)
22
+ else:
23
+ print(data)
24
+
25
+ def save_graph(saved_graphs_state, embedding, category):
26
+ print(f"\nCategory:\n{saved_graphs_state}\n")
27
+ if category not in saved_graphs_state:
28
+ saved_graphs_state[category] = []
29
+ if embedding not in saved_graphs_state[category]:
30
+ saved_graphs_state[category].append(embedding)
31
+ return saved_graphs_state, gr.Button("Graph Saved")
32
+
33
+
34
+ # Function to save feedback
35
+ def save_feedback(feed: str, user_id):
36
+ if len(feed) > 1:
37
+ timestamp = str(datetime.now().timestamp())
38
+ file = user_id + timestamp + ".json"
39
+ logs = {
40
+ "user_id": user_id,
41
+ "feedback": feed,
42
+ "time": timestamp,
43
+ }
44
+ log_on_azure(file, logs, share_client)
45
+ return "Feedback submitted, thank you!"
46
+
front/event_listeners.py ADDED
File without changes
front/tabs/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from .tab_config import create_config_modal
2
+ from .tab_examples import create_examples_tab
3
+ from .tab_papers import create_papers_tab
4
+ from .tab_figures import create_figures_tab
5
+ from .chat_interface import create_chat_interface
6
+ from .tab_about import create_about_tab
front/tabs/chat_interface.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio.components import ChatMessage
3
+
4
+ # Initialize prompt and system template
5
+ init_prompt = """
6
+ Hello, I am ClimateQ&A, a conversational assistant designed to help you understand climate change and biodiversity loss. I will answer your questions by **sifting through the IPCC and IPBES scientific reports**.
7
+
8
+ ❓ How to use
9
+ - **Language**: You can ask me your questions in any language.
10
+ - **Audience**: You can specify your audience (children, general public, experts) to get a more adapted answer.
11
+ - **Sources**: You can choose to search in the IPCC or IPBES reports, or both.
12
+ - **Relevant content sources**: You can choose to search for figures, papers, or graphs that can be relevant for your question.
13
+
14
+ ⚠️ Limitations
15
+ *Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
16
+
17
+ 🛈 Information
18
+ Please note that we log your questions for meta-analysis purposes, so avoid sharing any sensitive or personal information.
19
+
20
+ What do you want to learn ?
21
+ """
22
+
23
+ init_prompt_poc = """
24
+ Hello, I am ClimateQ&A, a conversational assistant designed to help you understand climate change and biodiversity loss. I will answer your questions by **sifting through the IPCC and IPBES scientific reports, PCAET of Paris, the Plan Biodiversité 2018-2024, and Acclimaterra reports from la Région Nouvelle-Aquitaine **.
25
+
26
+ ❓ How to use
27
+ - **Language**: You can ask me your questions in any language.
28
+ - **Audience**: You can specify your audience (children, general public, experts) to get a more adapted answer.
29
+ - **Sources**: You can choose to search in the IPCC or IPBES reports, and POC sources for local documents (PCAET, Plan Biodiversité, Acclimaterra).
30
+ - **Relevant content sources**: You can choose to search for figures, papers, or graphs that can be relevant for your question.
31
+
32
+ ⚠️ Limitations
33
+ *Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
34
+
35
+ 🛈 Information
36
+ Please note that we log your questions for meta-analysis purposes, so avoid sharing any sensitive or personal information.
37
+
38
+ What do you want to learn ?
39
+ """
40
+
41
+
42
+
43
+ # UI Layout Components
44
+ def create_chat_interface(tab):
45
+ init_prompt_message = init_prompt_poc if tab == "Beta - POC Adapt'Action" else init_prompt
46
+ chatbot = gr.Chatbot(
47
+ value=[ChatMessage(role="assistant", content=init_prompt_message)],
48
+ type="messages",
49
+ show_copy_button=True,
50
+ show_label=False,
51
+ elem_id="chatbot",
52
+ layout="panel",
53
+ avatar_images=(None, "https://i.ibb.co/YNyd5W2/logo4.png"),
54
+ max_height="80vh",
55
+ height="100vh"
56
+ )
57
+
58
+ with gr.Row(elem_id="input-message"):
59
+
60
+ textbox = gr.Textbox(
61
+ placeholder="Ask me anything here!",
62
+ show_label=False,
63
+ scale=12,
64
+ lines=1,
65
+ interactive=True,
66
+ elem_id=f"input-textbox"
67
+ )
68
+
69
+ config_button = gr.Button("", elem_id="config-button")
70
+
71
+ return chatbot, textbox, config_button
72
+
73
+
74
+
front/tabs/main_tab.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from .chat_interface import create_chat_interface
3
+ from .tab_examples import create_examples_tab
4
+ from .tab_papers import create_papers_tab
5
+ from .tab_figures import create_figures_tab
6
+
7
+ def cqa_tab(tab_name):
8
+ # State variables
9
+ current_graphs = gr.State([])
10
+ with gr.Tab(tab_name):
11
+ with gr.Row(elem_id="chatbot-row"):
12
+ # Left column - Chat interface
13
+ with gr.Column(scale=2):
14
+ chatbot, textbox, config_button = create_chat_interface(tab_name)
15
+
16
+ # Right column - Content panels
17
+ with gr.Column(scale=2, variant="panel", elem_id="right-panel"):
18
+ with gr.Tabs(elem_id="right_panel_tab") as tabs:
19
+ # Examples tab
20
+ with gr.TabItem("Examples", elem_id="tab-examples", id=0):
21
+ examples_hidden, dropdown_samples, samples = create_examples_tab()
22
+
23
+ # Sources tab
24
+ with gr.Tab("Sources", elem_id="tab-sources", id=1) as tab_sources:
25
+ sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
26
+
27
+
28
+ # Recommended content tab
29
+ with gr.Tab("Recommended content", elem_id="tab-recommended_content", id=2) as tab_recommended_content:
30
+ with gr.Tabs(elem_id="group-subtabs") as tabs_recommended_content:
31
+ # Figures subtab
32
+ with gr.Tab("Figures", elem_id="tab-figures", id=3) as tab_figures:
33
+ sources_raw, new_figures, used_figures, gallery_component, figures_cards, figure_modal = create_figures_tab()
34
+
35
+ # Papers subtab
36
+ with gr.Tab("Papers", elem_id="tab-citations", id=4) as tab_papers:
37
+ papers_summary, papers_html, citations_network, papers_modal = create_papers_tab()
38
+
39
+ # Graphs subtab
40
+ with gr.Tab("Graphs", elem_id="tab-graphs", id=5) as tab_graphs:
41
+ graphs_container = gr.HTML(
42
+ "<h2>There are no graphs to be displayed at the moment. Try asking another question.</h2>",
43
+ elem_id="graphs-container"
44
+ )
45
+ return {
46
+ "chatbot": chatbot,
47
+ "textbox": textbox,
48
+ "tabs": tabs,
49
+ "sources_raw": sources_raw,
50
+ "new_figures": new_figures,
51
+ "current_graphs": current_graphs,
52
+ "examples_hidden": examples_hidden,
53
+ "dropdown_samples": dropdown_samples,
54
+ "samples": samples,
55
+ "sources_textbox": sources_textbox,
56
+ "figures_cards": figures_cards,
57
+ "gallery_component": gallery_component,
58
+ "config_button": config_button,
59
+ "papers_html": papers_html,
60
+ "citations_network": citations_network,
61
+ "papers_summary": papers_summary,
62
+ "tab_recommended_content": tab_recommended_content,
63
+ "tab_sources": tab_sources,
64
+ "tab_figures": tab_figures,
65
+ "tab_graphs": tab_graphs,
66
+ "tab_papers": tab_papers,
67
+ "graph_container": graphs_container
68
+ }
front/tabs/tab_about.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ # Citation information
4
+ CITATION_LABEL = "BibTeX citation for ClimateQ&A"
5
+ CITATION_TEXT = r"""@misc{climateqa,
6
+ author={Théo Alves Da Costa, Timothée Bohe},
7
+ title={ClimateQ&A, AI-powered conversational assistant for climate change and biodiversity loss},
8
+ year={2024},
9
+ howpublished= {\url{https://climateqa.com}},
10
+ }
11
+ @software{climateqa,
12
+ author = {Théo Alves Da Costa, Timothée Bohe},
13
+ publisher = {ClimateQ&A},
14
+ title = {ClimateQ&A, AI-powered conversational assistant for climate change and biodiversity loss},
15
+ }
16
+ """
17
+
18
+ def create_about_tab():
19
+ with gr.Tab("About", elem_classes="max-height other-tabs"):
20
+ with gr.Row():
21
+ with gr.Column(scale=1):
22
+ gr.Markdown(
23
+ """
24
+ ### More info
25
+ - See more info at [https://climateqa.com](https://climateqa.com/docs/intro/)
26
+ - Feedbacks on this [form](https://forms.office.com/e/1Yzgxm6jbp)
27
+
28
+ ### Citation
29
+ """
30
+ )
31
+ with gr.Accordion(CITATION_LABEL, elem_id="citation", open=False):
32
+ gr.Textbox(
33
+ value=CITATION_TEXT,
34
+ label="",
35
+ interactive=False,
36
+ show_copy_button=True,
37
+ lines=len(CITATION_TEXT.split('\n')),
38
+ )
front/tabs/tab_config.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_modal import Modal
3
+ from climateqa.constants import POSSIBLE_REPORTS
4
+ from typing import TypedDict
5
+
6
+ class ConfigPanel(TypedDict):
7
+ config_open: gr.State
8
+ config_modal: Modal
9
+ dropdown_sources: gr.CheckboxGroup
10
+ dropdown_reports: gr.Dropdown
11
+ dropdown_external_sources: gr.CheckboxGroup
12
+ search_only: gr.Checkbox
13
+ dropdown_audience: gr.Dropdown
14
+ after: gr.Slider
15
+ output_query: gr.Textbox
16
+ output_language: gr.Textbox
17
+
18
+
19
+ def create_config_modal():
20
+ config_open = gr.State(value=True)
21
+ with Modal(visible=False, elem_id="modal-config") as config_modal:
22
+ gr.Markdown("Reminders: You can talk in any language, ClimateQ&A is multi-lingual!")
23
+
24
+ dropdown_sources = gr.CheckboxGroup(
25
+ choices=["IPCC", "IPBES", "IPOS"],
26
+ label="Select source (by default search in all sources)",
27
+ value=["IPCC"],
28
+ interactive=True
29
+ )
30
+
31
+ dropdown_reports = gr.Dropdown(
32
+ choices=POSSIBLE_REPORTS,
33
+ label="Or select specific reports",
34
+ multiselect=True,
35
+ value=None,
36
+ interactive=True
37
+ )
38
+
39
+ dropdown_external_sources = gr.CheckboxGroup(
40
+ choices=["Figures (IPCC/IPBES)", "Papers (OpenAlex)", "Graphs (OurWorldInData)","POC region"],
41
+ label="Select database to search for relevant content",
42
+ value=["Figures (IPCC/IPBES)","POC region"],
43
+ interactive=True
44
+ )
45
+
46
+ search_only = gr.Checkbox(
47
+ label="Search only for recommended content without chating",
48
+ value=False,
49
+ interactive=True,
50
+ elem_id="checkbox-chat"
51
+ )
52
+
53
+ dropdown_audience = gr.Dropdown(
54
+ choices=["Children", "General public", "Experts"],
55
+ label="Select audience",
56
+ value="Experts",
57
+ interactive=True
58
+ )
59
+
60
+ after = gr.Slider(
61
+ minimum=1950,
62
+ maximum=2023,
63
+ step=1,
64
+ value=1960,
65
+ label="Publication date",
66
+ show_label=True,
67
+ interactive=True,
68
+ elem_id="date-papers",
69
+ visible=False
70
+ )
71
+
72
+ output_query = gr.Textbox(
73
+ label="Query used for retrieval",
74
+ show_label=True,
75
+ elem_id="reformulated-query",
76
+ lines=2,
77
+ interactive=False,
78
+ visible=False
79
+ )
80
+
81
+ output_language = gr.Textbox(
82
+ label="Language",
83
+ show_label=True,
84
+ elem_id="language",
85
+ lines=1,
86
+ interactive=False,
87
+ visible=False
88
+ )
89
+
90
+ dropdown_external_sources.change(
91
+ lambda x: gr.update(visible="Papers (OpenAlex)" in x),
92
+ inputs=[dropdown_external_sources],
93
+ outputs=[after]
94
+ )
95
+
96
+ close_config_modal_button = gr.Button("Validate and Close", elem_id="close-config-modal")
97
+
98
+
99
+ # return ConfigPanel(
100
+ # config_open=config_open,
101
+ # config_modal=config_modal,
102
+ # dropdown_sources=dropdown_sources,
103
+ # dropdown_reports=dropdown_reports,
104
+ # dropdown_external_sources=dropdown_external_sources,
105
+ # search_only=search_only,
106
+ # dropdown_audience=dropdown_audience,
107
+ # after=after,
108
+ # output_query=output_query,
109
+ # output_language=output_language
110
+ # )
111
+ return {
112
+ "config_open" : config_open,
113
+ "config_modal": config_modal,
114
+ "dropdown_sources": dropdown_sources,
115
+ "dropdown_reports": dropdown_reports,
116
+ "dropdown_external_sources": dropdown_external_sources,
117
+ "search_only": search_only,
118
+ "dropdown_audience": dropdown_audience,
119
+ "after": after,
120
+ "output_query": output_query,
121
+ "output_language": output_language,
122
+ "close_config_modal_button": close_config_modal_button
123
+ }
front/tabs/tab_examples.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from climateqa.sample_questions import QUESTIONS
3
+
4
+
5
+ def create_examples_tab():
6
+ examples_hidden = gr.Textbox(visible=False, elem_id=f"examples-hidden")
7
+ first_key = list(QUESTIONS.keys())[0]
8
+ dropdown_samples = gr.Dropdown(
9
+ choices=QUESTIONS.keys(),
10
+ value=first_key,
11
+ interactive=True,
12
+ label="Select a category of sample questions",
13
+ elem_id="dropdown-samples"
14
+ )
15
+
16
+ samples = []
17
+ for i, key in enumerate(QUESTIONS.keys()):
18
+ examples_visible = (i == 0)
19
+ with gr.Row(visible=examples_visible) as group_examples:
20
+ examples_questions = gr.Examples(
21
+ examples=QUESTIONS[key],
22
+ inputs=[examples_hidden],
23
+ examples_per_page=8,
24
+ run_on_click=False,
25
+ elem_id=f"examples{i}",
26
+ api_name=f"examples{i}"
27
+ )
28
+ samples.append(group_examples)
29
+
30
+
31
+ def change_sample_questions(key):
32
+ index = list(QUESTIONS.keys()).index(key)
33
+ visible_bools = [False] * len(samples)
34
+ visible_bools[index] = True
35
+ return [gr.update(visible=visible_bools[i]) for i in range(len(samples))]
36
+
37
+ # event listener
38
+ dropdown_samples.change(change_sample_questions, dropdown_samples, samples)
39
+
40
+ return examples_hidden
front/tabs/tab_figures.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_modal import Modal
3
+
4
+
5
+ def create_figures_tab():
6
+ sources_raw = gr.State()
7
+ new_figures = gr.State([])
8
+ used_figures = gr.State([])
9
+
10
+ with Modal(visible=False, elem_id="modal_figure_galery") as figure_modal:
11
+ gallery_component = gr.Gallery(
12
+ object_fit='scale-down',
13
+ elem_id="gallery-component",
14
+ height="80vh"
15
+ )
16
+
17
+ show_full_size_figures = gr.Button(
18
+ "Show figures in full size",
19
+ elem_id="show-figures",
20
+ interactive=True
21
+ )
22
+ show_full_size_figures.click(
23
+ lambda: Modal(visible=True),
24
+ None,
25
+ figure_modal
26
+ )
27
+
28
+ figures_cards = gr.HTML(show_label=False, elem_id="sources-figures")
29
+
30
+ return sources_raw, new_figures, used_figures, gallery_component, figures_cards, figure_modal
31
+
front/tabs/tab_papers.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_modal import Modal
3
+
4
+
5
+ def create_papers_tab():
6
+ direct_search_textbox = gr.Textbox(label="Direct search for papers", placeholder= "What is climate change ?", elem_id="papers-search")
7
+
8
+ with gr.Accordion(
9
+ visible=True,
10
+ elem_id="papers-summary-popup",
11
+ label="See summary of relevant papers",
12
+ open=False
13
+ ) as summary_popup:
14
+ papers_summary = gr.Markdown("", visible=True, elem_id="papers-summary")
15
+
16
+ with gr.Accordion(
17
+ visible=True,
18
+ elem_id="papers-relevant-popup",
19
+ label="See relevant papers",
20
+ open=False
21
+ ) as relevant_popup:
22
+ papers_html = gr.HTML(show_label=False, elem_id="papers-textbox")
23
+
24
+ btn_citations_network = gr.Button("Explore papers citations network")
25
+ with Modal(visible=False) as papers_modal:
26
+ citations_network = gr.HTML(
27
+ "<h3>Citations Network Graph</h3>",
28
+ visible=True,
29
+ elem_id="papers-citations-network"
30
+ )
31
+ btn_citations_network.click(
32
+ lambda: Modal(visible=True),
33
+ None,
34
+ papers_modal
35
+ )
36
+
37
+ return direct_search_textbox, papers_summary, papers_html, citations_network, papers_modal
38
+
front/tabs/tab_recommended_content.py ADDED
File without changes
front/utils.py CHANGED
@@ -39,23 +39,33 @@ def parse_output_llm_with_sources(output:str)->str:
39
  content_parts = "".join(parts)
40
  return content_parts
41
 
42
- def process_figures(docs:list)->tuple:
43
- gallery=[]
44
- used_figures =[]
 
 
 
 
45
  figures = '<div class="figures-container"><p></p> </div>'
 
 
 
 
 
 
 
46
  docs_figures = [d for d in docs if d.metadata["chunk_type"] == "image"]
47
- for i, doc in enumerate(docs_figures):
48
- if doc.metadata["chunk_type"] == "image":
49
- if doc.metadata["figure_code"] != "N/A":
50
- title = f"{doc.metadata['figure_code']} - {doc.metadata['short_name']}"
51
- else:
52
- title = f"{doc.metadata['short_name']}"
53
 
54
 
55
- if title not in used_figures:
56
- used_figures.append(title)
 
 
57
  try:
58
- key = f"Image {i+1}"
59
 
60
  image_path = doc.metadata["image_path"].split("documents/")[1]
61
  img = get_image_from_azure_blob_storage(image_path)
@@ -68,12 +78,12 @@ def process_figures(docs:list)->tuple:
68
 
69
  img_str = base64.b64encode(buffered.getvalue()).decode()
70
 
71
- figures = figures + make_html_figure_sources(doc, i, img_str)
72
  gallery.append(img)
73
  except Exception as e:
74
- print(f"Skipped adding image {i} because of {e}")
75
 
76
- return figures, gallery
77
 
78
 
79
  def generate_html_graphs(graphs:list)->str:
 
39
  content_parts = "".join(parts)
40
  return content_parts
41
 
42
+
43
+
44
+ def process_figures(docs:list, new_figures:list)->tuple:
45
+ if new_figures == []:
46
+ return docs, "", []
47
+ docs = docs + new_figures
48
+
49
  figures = '<div class="figures-container"><p></p> </div>'
50
+ gallery = []
51
+ used_figures = []
52
+
53
+ if docs == []:
54
+ return docs, figures, gallery
55
+
56
+
57
  docs_figures = [d for d in docs if d.metadata["chunk_type"] == "image"]
58
+ for i_doc, doc in enumerate(docs_figures):
59
+ if doc.metadata["chunk_type"] == "image":
60
+ path = doc.metadata["image_path"]
 
 
 
61
 
62
 
63
+ if path not in used_figures:
64
+ used_figures.append(path)
65
+ figure_number = len(used_figures)
66
+
67
  try:
68
+ key = f"Image {figure_number}"
69
 
70
  image_path = doc.metadata["image_path"].split("documents/")[1]
71
  img = get_image_from_azure_blob_storage(image_path)
 
78
 
79
  img_str = base64.b64encode(buffered.getvalue()).decode()
80
 
81
+ figures = figures + make_html_figure_sources(doc, figure_number, img_str)
82
  gallery.append(img)
83
  except Exception as e:
84
+ print(f"Skipped adding image {figure_number} because of {e}")
85
 
86
+ return docs, figures, gallery
87
 
88
 
89
  def generate_html_graphs(graphs:list)->str:
requirements.txt CHANGED
@@ -4,7 +4,7 @@ azure-storage-blob
4
  python-dotenv==1.0.0
5
  langchain==0.2.1
6
  langchain_openai==0.1.7
7
- langgraph==0.0.55
8
  pinecone-client==4.1.0
9
  sentence-transformers==2.6.0
10
  huggingface-hub
@@ -19,3 +19,5 @@ langchain-community==0.2
19
  msal==1.31
20
  matplotlib==3.9.2
21
  gradio-modal==0.0.4
 
 
 
4
  python-dotenv==1.0.0
5
  langchain==0.2.1
6
  langchain_openai==0.1.7
7
+ langgraph==0.2.70
8
  pinecone-client==4.1.0
9
  sentence-transformers==2.6.0
10
  huggingface-hub
 
19
  msal==1.31
20
  matplotlib==3.9.2
21
  gradio-modal==0.0.4
22
+ vanna==0.7.5
23
+ geopy==2.4.1
sandbox/20241104 - CQA - StepByStep CQA.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
sandbox/talk_to_data/20250306 - CQA - Drias.ipynb ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "## Import the function in main.py"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "code",
12
+ "execution_count": null,
13
+ "metadata": {},
14
+ "outputs": [],
15
+ "source": [
16
+ "import sys\n",
17
+ "import os\n",
18
+ "sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))\n",
19
+ "\n",
20
+ "%load_ext autoreload\n",
21
+ "%autoreload 2\n",
22
+ "\n",
23
+ "from climateqa.engine.talk_to_data.main import ask_vanna\n"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "markdown",
28
+ "metadata": {},
29
+ "source": [
30
+ "## Create a human query"
31
+ ]
32
+ },
33
+ {
34
+ "cell_type": "code",
35
+ "execution_count": null,
36
+ "metadata": {},
37
+ "outputs": [],
38
+ "source": [
39
+ "# query = \"Compare the winter and summer precipitation in 2050 in Marseille\"\n",
40
+ "# query = \"What is the impact of climate in Bordeaux?\"\n",
41
+ "# query = \"what is the number of days where the temperature above 35 in 2050 in Marseille\"\n",
42
+ "# query = \"Quelle sera la température à Marseille sur les prochaines années ?\"\n",
43
+ "# query = \"Comment vont évoluer les températures à Marseille ?\"\n",
44
+ "query = \"Comment vont évoluer les températures à marseille ?\""
45
+ ]
46
+ },
47
+ {
48
+ "cell_type": "markdown",
49
+ "metadata": {},
50
+ "source": [
51
+ "## Call the function ask vanna, it gives an output of a the sql query and the dataframe of the result (tuple)"
52
+ ]
53
+ },
54
+ {
55
+ "cell_type": "code",
56
+ "execution_count": null,
57
+ "metadata": {},
58
+ "outputs": [],
59
+ "source": [
60
+ "sql_query, df, fig = ask_vanna(query)\n",
61
+ "print(df.head())\n",
62
+ "fig.show()"
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "execution_count": null,
68
+ "metadata": {},
69
+ "outputs": [],
70
+ "source": []
71
+ }
72
+ ],
73
+ "metadata": {
74
+ "kernelspec": {
75
+ "display_name": "climateqa",
76
+ "language": "python",
77
+ "name": "python3"
78
+ },
79
+ "language_info": {
80
+ "codemirror_mode": {
81
+ "name": "ipython",
82
+ "version": 3
83
+ },
84
+ "file_extension": ".py",
85
+ "mimetype": "text/x-python",
86
+ "name": "python",
87
+ "nbconvert_exporter": "python",
88
+ "pygments_lexer": "ipython3",
89
+ "version": "3.11.9"
90
+ }
91
+ },
92
+ "nbformat": 4,
93
+ "nbformat_minor": 2
94
+ }
sandbox/talk_to_data/20250306 - CQA - Step_by_step_vanna.ipynb ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import sys\n",
10
+ "import os\n",
11
+ "sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))\n",
12
+ "\n",
13
+ "%load_ext autoreload\n",
14
+ "%autoreload 2\n",
15
+ "\n",
16
+ "from climateqa.engine.talk_to_data.main import ask_vanna\n",
17
+ "\n",
18
+ "import sqlite3\n",
19
+ "import os\n",
20
+ "import pandas as pd"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "markdown",
25
+ "metadata": {},
26
+ "source": [
27
+ "# Imports"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "code",
32
+ "execution_count": null,
33
+ "metadata": {},
34
+ "outputs": [],
35
+ "source": [
36
+ "from climateqa.engine.talk_to_data.myVanna import MyVanna\n",
37
+ "from climateqa.engine.talk_to_data.utils import loc2coords, detect_location_with_openai, detectTable, nearestNeighbourSQL, detect_relevant_tables, replace_coordonates\n",
38
+ "\n",
39
+ "from climateqa.engine.llm import get_llm"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "markdown",
44
+ "metadata": {},
45
+ "source": [
46
+ "# Vanna Ask\n"
47
+ ]
48
+ },
49
+ {
50
+ "cell_type": "code",
51
+ "execution_count": null,
52
+ "metadata": {},
53
+ "outputs": [],
54
+ "source": [
55
+ "from dotenv import load_dotenv\n",
56
+ "\n",
57
+ "load_dotenv()\n",
58
+ "\n",
59
+ "llm = get_llm(provider=\"openai\")\n",
60
+ "\n",
61
+ "OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')\n",
62
+ "PC_API_KEY = os.getenv('VANNA_PINECONE_API_KEY')\n",
63
+ "INDEX_NAME = os.getenv('VANNA_INDEX_NAME')\n",
64
+ "VANNA_MODEL = os.getenv('VANNA_MODEL')\n",
65
+ "\n",
66
+ "ROOT_PATH = os.path.dirname(os.path.dirname(os.getcwd()))\n",
67
+ "\n",
68
+ "#Vanna object\n",
69
+ "vn = MyVanna(config = {\"temperature\": 0, \"api_key\": OPENAI_API_KEY, 'model': VANNA_MODEL, 'pc_api_key': PC_API_KEY, 'index_name': INDEX_NAME, \"top_k\" : 4})\n",
70
+ "db_vanna_path = ROOT_PATH + \"/data/drias/drias.db\"\n",
71
+ "vn.connect_to_sqlite(db_vanna_path)\n"
72
+ ]
73
+ },
74
+ {
75
+ "cell_type": "markdown",
76
+ "metadata": {},
77
+ "source": [
78
+ "# User query"
79
+ ]
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "execution_count": null,
84
+ "metadata": {},
85
+ "outputs": [],
86
+ "source": [
87
+ "# query = \"Quelle sera la température à Marseille sur les prochaines années ?\"\n",
88
+ "query = \"Comment vont évoluer les températures à marseille ?\""
89
+ ]
90
+ },
91
+ {
92
+ "cell_type": "markdown",
93
+ "metadata": {},
94
+ "source": [
95
+ "## Detect location"
96
+ ]
97
+ },
98
+ {
99
+ "cell_type": "code",
100
+ "execution_count": null,
101
+ "metadata": {},
102
+ "outputs": [],
103
+ "source": [
104
+ "location = detect_location_with_openai(OPENAI_API_KEY, query)\n",
105
+ "print(location)"
106
+ ]
107
+ },
108
+ {
109
+ "cell_type": "markdown",
110
+ "metadata": {},
111
+ "source": [
112
+ "## Convert location to longitude, latitude coordonate"
113
+ ]
114
+ },
115
+ {
116
+ "cell_type": "code",
117
+ "execution_count": null,
118
+ "metadata": {},
119
+ "outputs": [],
120
+ "source": [
121
+ "coords = loc2coords(location)\n",
122
+ "user_input = query.lower().replace(location.lower(), f\"lat, long : {coords}\")\n",
123
+ "print(user_input)"
124
+ ]
125
+ },
126
+ {
127
+ "cell_type": "markdown",
128
+ "metadata": {},
129
+ "source": [
130
+ "# Find closest coordonates and replace lat,lon\n"
131
+ ]
132
+ },
133
+ {
134
+ "cell_type": "code",
135
+ "execution_count": null,
136
+ "metadata": {},
137
+ "outputs": [],
138
+ "source": [
139
+ "relevant_tables = detect_relevant_tables(user_input, llm) \n",
140
+ "coords_tables = [nearestNeighbourSQL(db_vanna_path, coords, relevant_tables[i]) for i in range(len(relevant_tables))]\n",
141
+ "user_input_with_coords = replace_coordonates(coords, user_input, coords_tables)\n",
142
+ "print(user_input_with_coords)"
143
+ ]
144
+ },
145
+ {
146
+ "cell_type": "markdown",
147
+ "metadata": {},
148
+ "source": [
149
+ "# Ask Vanna with correct coordonates"
150
+ ]
151
+ },
152
+ {
153
+ "cell_type": "code",
154
+ "execution_count": null,
155
+ "metadata": {},
156
+ "outputs": [],
157
+ "source": [
158
+ "sql_query, result_dataframe, figure = vn.ask(user_input_with_coords, print_results=False, allow_llm_to_see_data=True, auto_train=False)\n"
159
+ ]
160
+ },
161
+ {
162
+ "cell_type": "code",
163
+ "execution_count": null,
164
+ "metadata": {},
165
+ "outputs": [],
166
+ "source": [
167
+ "result_dataframe"
168
+ ]
169
+ },
170
+ {
171
+ "cell_type": "code",
172
+ "execution_count": null,
173
+ "metadata": {},
174
+ "outputs": [],
175
+ "source": [
176
+ "figure"
177
+ ]
178
+ }
179
+ ],
180
+ "metadata": {
181
+ "kernelspec": {
182
+ "display_name": "climateqa",
183
+ "language": "python",
184
+ "name": "python3"
185
+ },
186
+ "language_info": {
187
+ "codemirror_mode": {
188
+ "name": "ipython",
189
+ "version": 3
190
+ },
191
+ "file_extension": ".py",
192
+ "mimetype": "text/x-python",
193
+ "name": "python",
194
+ "nbconvert_exporter": "python",
195
+ "pygments_lexer": "ipython3",
196
+ "version": "3.11.9"
197
+ }
198
+ },
199
+ "nbformat": 4,
200
+ "nbformat_minor": 2
201
+ }
style.css CHANGED
@@ -1,89 +1,127 @@
1
-
2
  /* :root {
3
  --user-image: url('https://ih1.redbubble.net/image.4776899543.6215/st,small,507x507-pad,600x600,f8f8f8.jpg');
4
- } */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- #tab-recommended_content{
7
- padding-top: 0px;
8
- padding-left : 0px;
9
- padding-right: 0px;
10
  }
 
11
  #group-subtabs {
12
  /* display: block; */
13
- width: 100%; /* Ensures the parent uses the full width */
14
  position : sticky;
15
  }
16
 
17
- #group-subtabs .tab-container {
18
- display: flex;
19
- text-align: center;
20
- width: 100%; /* Ensures the tabs span the full width */
21
- }
22
 
23
- #group-subtabs .tab-container button {
24
- flex: 1; /* Makes each button take equal width */
25
  }
26
 
 
 
 
27
 
28
- #papers-summary-popup button span{
29
- /* make label of accordio in bold, center, and bigger */
30
- font-size: 16px;
31
  font-weight: bold;
32
- text-align: center;
 
33
 
 
 
34
  }
35
 
36
- #papers-relevant-popup span{
37
- /* make label of accordio in bold, center, and bigger */
38
- font-size: 16px;
39
- font-weight: bold;
40
- text-align: center;
41
  }
42
 
 
 
 
 
 
 
 
 
 
 
43
 
 
 
 
44
 
45
- #tab-citations .button{
46
- padding: 12px 16px;
47
- font-size: 16px;
48
  font-weight: bold;
49
- cursor: pointer;
50
- border: none;
51
- outline: none;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  text-align: left;
53
- transition: background-color 0.3s ease;
54
  }
55
 
 
 
 
56
 
57
- .gradio-container {
58
- width: 100%!important;
59
- max-width: 100% !important;
60
  }
61
 
62
- /* fix for huggingface infinite growth*/
63
- main.flex.flex-1.flex-col {
64
- max-height: 95vh !important;
65
  }
66
 
67
- button#show-figures{
68
- /* Base styles */
69
- background-color: #f5f5f5;
70
- border: 1px solid #e0e0e0;
71
- border-radius: 4px;
72
- color: #333333;
73
- cursor: pointer;
74
- width: 100%;
75
- text-align: center;
76
  }
77
 
78
- .avatar-container.svelte-1x5p6hu:not(.thumbnail-item) img {
79
- width: 100%;
80
- height: 100%;
81
- object-fit: cover;
82
- border-radius: 50%;
83
- padding: 0px;
84
- margin: 0px;
85
  }
86
 
 
87
  .warning-box {
88
  background-color: #fff3cd;
89
  border: 1px solid #ffeeba;
@@ -93,32 +131,20 @@ button#show-figures{
93
  color: #856404;
94
  display: inline-block;
95
  margin-bottom: 15px;
96
- }
97
-
98
 
99
  .tip-box {
100
  background-color: #f0f9ff;
101
  border: 1px solid #80d4fa;
102
  border-radius: 4px;
103
- margin-top:20px;
104
  padding: 15px 20px;
105
  font-size: 14px;
106
  display: inline-block;
107
- margin-bottom: 15px;
108
  width: auto;
109
- color:black !important;
110
- }
111
-
112
- body.dark .warning-box * {
113
- color:black !important;
114
- }
115
-
116
-
117
- body.dark .tip-box * {
118
- color:black !important;
119
  }
120
 
121
-
122
  .tip-box-title {
123
  font-weight: bold;
124
  font-size: 14px;
@@ -130,116 +156,128 @@ body.dark .tip-box * {
130
  margin-right: 5px;
131
  }
132
 
133
- .gr-box {border-color: #d6c37c}
134
-
135
- #hidden-message{
136
- display:none;
 
 
 
 
 
 
 
137
  }
138
 
139
- .message{
140
- font-size:14px !important;
141
-
142
- }
143
- .card-content img {
144
- display: block;
145
- margin: auto;
146
- max-width: 100%; /* Ensures the image is responsive */
147
- height: auto;
148
  }
149
 
150
- a {
151
- text-decoration: none;
152
- color: inherit;
 
 
 
 
153
  }
154
 
155
- .doc-ref sup{
156
- color:#dc2626!important;
157
- /* margin-right:1px; */
158
  }
159
 
 
 
 
 
160
 
161
- .card {
162
- background-color: white;
163
- border-radius: 10px;
164
- box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
165
- overflow: hidden;
166
- display: flex;
167
- flex-direction: column;
168
- margin:20px;
169
  }
170
 
171
- .card-content {
172
- padding: 20px;
 
 
173
  }
174
 
175
- .card-content h2 {
176
- font-size: 14px !important;
177
- font-weight: bold;
178
- margin-bottom: 10px;
179
- margin-top:0px !important;
180
- color:#dc2626!important;;
181
  }
182
 
183
- .card-content p {
184
- font-size: 12px;
185
- margin-bottom: 0;
 
 
186
  }
187
 
188
- .card-footer {
189
- background-color: #f4f4f4;
190
- font-size: 10px;
191
  padding: 10px;
 
 
192
  display: flex;
193
- justify-content: space-between;
194
  align-items: center;
 
 
195
  }
196
 
197
- .card-footer span {
198
- flex-grow: 1;
199
- text-align: left;
200
- color: #999 !important;
 
 
 
 
 
201
  }
202
 
203
- .pdf-link {
204
- display: inline-flex;
205
- align-items: center;
206
- margin-left: auto;
207
- text-decoration: none!important;
208
- font-size: 14px;
 
 
 
209
  }
210
 
211
-
212
-
213
- .message.user{
214
- /* background-color:#7494b0 !important; */
215
- border:none;
216
- /* color:white!important; */
217
  }
218
 
219
- .message.bot{
220
- /* background-color:#f2f2f7 !important; */
221
- border:none;
222
  }
223
 
224
-
225
- label.selected{
226
- background: #93c5fd !important;
227
  }
228
 
229
- #submit-button{
230
- padding:0px !important;
231
  }
232
 
233
- #modal-config .block.modal-block.padded {
234
- padding-top: 25px;
235
- height: 100vh;
236
-
237
- }
238
- #modal-config .modal-container{
239
- margin: 0px;
240
- padding: 0px;
241
  }
242
- /* Modal styles */
 
243
  #modal-config {
244
  position: fixed;
245
  top: 0;
@@ -252,28 +290,23 @@ label.selected{
252
  padding: 15px;
253
  transform: none;
254
  }
255
- #modal-config .close{
256
- display: none;
 
 
257
  }
258
 
259
- /* Push main content to the right when modal is open */
260
- /* .modal ~ * {
261
- margin-left: 300px;
262
- transition: margin-left 0.3s ease;
263
- } */
264
 
265
- #modal-config .modal .wrap ul{
266
- position:static;
267
- top: 100%;
268
- left: 0;
269
- /* min-height: 100px; */
270
- height: 100%;
271
- /* margin-top: 0; */
272
- z-index: 9999;
273
- pointer-events: auto;
274
- height: 200px;
275
  }
276
- #config-button{
 
 
277
  background: none;
278
  border: none;
279
  padding: 8px;
@@ -296,155 +329,230 @@ label.selected{
296
  background-color: rgba(0, 0, 0, 0.1);
297
  }
298
 
299
- #checkbox-config{
300
- display: block;
301
- position: absolute;
302
- background: none;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
  border: none;
304
- padding: 8px;
 
 
 
 
 
 
 
 
 
 
305
  cursor: pointer;
306
- width: 40px;
307
- height: 40px;
308
- display: flex;
309
- align-items: center;
310
- justify-content: center;
311
- border-radius: 50%;
312
- transition: background-color 0.2s;
313
- font-size: 20px;
314
  text-align: center;
315
  }
316
- #checkbox-config:checked{
317
- display: block;
 
 
318
  }
319
 
 
 
 
 
320
 
 
 
 
 
321
 
322
- @media screen and (min-width: 1024px) {
323
- /* Additional style for scrollable tab content */
324
- /* div#tab-recommended_content {
325
- overflow-y: auto;
326
- max-height: 80vh;
327
- } */
328
 
329
- .gradio-container {
330
- max-height: calc(100vh - 190px) !important;
331
- overflow: hidden;
332
- }
333
- /* div#chatbot{
334
- height:calc(100vh - 170px) !important;
335
- max-height:calc(100vh - 170px) !important;
336
 
337
- } */
 
 
 
 
338
 
 
 
 
 
339
 
340
-
341
- div#tab-examples{
342
- height:calc(100vh - 190px) !important;
343
- overflow-y: scroll !important;
344
- /* overflow-y: auto; */
345
- }
346
 
347
- div#sources-textbox{
348
- height:calc(100vh - 190px) !important;
349
- overflow-y: scroll !important;
350
- /* overflow-y: auto !important; */
351
- }
352
- div#graphs-container{
353
- height:calc(100vh - 210px) !important;
354
- overflow-y: scroll !important;
355
- }
356
 
357
- div#sources-figures{
358
- height:calc(100vh - 300px) !important;
359
- max-height: 90vh !important;
360
- overflow-y: scroll !important;
361
- }
362
 
363
- div#graphs-container{
364
- height:calc(100vh - 300px) !important;
365
- max-height: 90vh !important;
366
- overflow-y: scroll !important;
367
- }
368
 
369
- div#tab-citations{
370
- height:calc(100vh - 300px) !important;
371
- max-height: 90vh !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
372
  overflow-y: scroll !important;
373
  }
374
-
375
- div#tab-config{
376
- height:calc(100vh - 190px) !important;
 
 
 
377
  overflow-y: scroll !important;
378
- /* overflow-y: auto !important; */
379
  }
380
 
381
- /* Force container to respect height limits */
382
- .main-component{
383
- contain: size layout;
384
- overflow: hidden;
385
  }
386
 
387
-
388
- div#chatbot-row{
389
- max-height:calc(100vh - 90px) !important;
390
  }
391
- /*
392
-
393
 
394
- .max-height{
395
- height:calc(100vh - 90px) !important;
396
- max-height:calc(100vh - 90px) !important;
397
  overflow-y: auto;
 
398
  }
399
- */
400
-
401
- }
402
-
403
- footer {
404
- visibility: hidden;
405
- display:none !important;
406
  }
407
 
408
-
409
  @media screen and (max-width: 767px) {
410
- /* Your mobile-specific styles go here */
411
-
412
- div#chatbot{
413
- height:500px !important;
414
  }
415
 
416
- #submit-button{
417
- padding:0px !important;
418
  min-width: 80px;
419
  }
420
 
421
- /* This will hide all list items */
422
  div.tab-nav button {
423
  display: none !important;
424
  }
425
 
426
- /* This will show only the first list item */
427
- div.tab-nav button:first-child {
428
- display: block !important;
429
- }
430
-
431
- /* This will show only the first list item */
432
  div.tab-nav button:nth-child(2) {
433
  display: block !important;
434
  }
435
-
436
- #right-panel button{
437
  display: block !important;
438
  }
439
 
440
- /* ... add other mobile-specific styles ... */
 
 
 
 
 
 
 
 
441
  }
442
 
 
443
  @media (prefers-color-scheme: dark) {
444
- .card{
445
  background-color: #374151;
446
  }
447
- .card-image > .card-content{
 
448
  background-color: rgb(55, 65, 81) !important;
449
  }
450
 
@@ -452,251 +560,61 @@ footer {
452
  background-color: #404652;
453
  }
454
 
455
- .container > .wrap{
456
  background-color: #374151 !important;
457
- color:white !important;
458
  }
459
- .card-content h2{
460
- color:#e7754f !important;
461
- }
462
- .doc-ref sup{
463
- color:rgb(235 109 35)!important;
464
- /* margin-right:1px; */
465
  }
 
466
  .card-footer span {
467
- color:white !important;
468
  }
469
-
470
- }
471
-
472
-
473
- .doc-ref{
474
- color:#dc2626!important;
475
- margin-right:1px;
476
- }
477
-
478
- .tabitem{
479
- border:none !important;
480
- }
481
-
482
- .other-tabs > div{
483
- padding-left:40px;
484
- padding-right:40px;
485
- padding-top:10px;
486
- }
487
-
488
- .gallery-item > div{
489
- white-space: normal !important; /* Allow the text to wrap */
490
- word-break: break-word !important; /* Break words to prevent overflow */
491
- overflow-wrap: break-word !important; /* Break long words if necessary */
492
- }
493
-
494
- span.chatbot > p > img{
495
- margin-top:40px !important;
496
- max-height: none !important;
497
- max-width: 80% !important;
498
- border-radius:0px !important;
499
- }
500
-
501
-
502
- .chatbot-caption{
503
- font-size:11px;
504
- font-style:italic;
505
- color:#508094;
506
- }
507
-
508
- .ai-generated{
509
- font-size:11px!important;
510
- font-style:italic;
511
- color:#73b8d4 !important;
512
- }
513
-
514
- .card-image > .card-content{
515
- background-color:#f1f7fa;
516
- }
517
-
518
-
519
-
520
- .tab-nav > button.selected{
521
- color:#4b8ec3;
522
- font-weight:bold;
523
- border:none;
524
- }
525
-
526
- .tab-nav{
527
- border:none !important;
528
- }
529
-
530
- #input-textbox > label > textarea{
531
- border-radius:40px;
532
- padding-left:30px;
533
- resize:none;
534
- }
535
-
536
- #input-message > div{
537
- border:none;
538
- }
539
-
540
- #dropdown-samples{
541
-
542
- background:none !important;
543
-
544
- }
545
-
546
- #dropdown-samples > .container > .wrap{
547
- background-color:white;
548
- }
549
-
550
 
551
- #tab-examples > div > .form{
552
- border:none;
553
- background:none !important;
554
- }
555
 
556
- .a-doc-ref{
557
- text-decoration: none !important;
 
558
  }
559
 
560
-
561
- .dropdown {
562
- position: relative;
563
- display:inline-block;
564
- margin-bottom: 10px;
565
- }
566
-
567
- .dropdown-toggle {
568
- background-color: #f2f2f2;
569
- color: black;
570
- padding: 10px;
571
- font-size: 16px;
572
- cursor: pointer;
573
- display: block;
574
- width: 400px; /* Adjust width as needed */
575
- position: relative;
576
- display: flex;
577
- align-items: center; /* Vertically center the contents */
578
- justify-content: left;
579
- }
580
-
581
- .dropdown-toggle .caret {
582
- content: "";
583
- position: absolute;
584
- right: 10px;
585
- top: 50%;
586
- border-left: 5px solid transparent;
587
- border-right: 5px solid transparent;
588
- border-top: 5px solid black;
589
- transform: translateY(-50%);
590
- }
591
-
592
- input[type="checkbox"] {
593
- display: none !important;
594
- }
595
-
596
- input[type="checkbox"]:checked + .dropdown-content {
597
  display: block;
598
- }
599
-
600
- #checkbox-chat input[type="checkbox"] {
601
- display: flex !important;
602
- }
603
-
604
- .dropdown-content {
605
- display: none;
606
  position: absolute;
607
- background-color: #f9f9f9;
608
- min-width: 300px;
609
- box-shadow: 0 8px 16px 0 rgba(0,0,0,0.2);
610
- z-index: 1;
611
- padding: 12px;
612
- border: 1px solid #ccc;
613
- }
614
-
615
- input[type="checkbox"]:checked + .dropdown-toggle + .dropdown-content {
616
- display: block;
617
- }
618
-
619
- input[type="checkbox"]:checked + .dropdown-toggle .caret {
620
- border-top: 0;
621
- border-bottom: 5px solid black;
622
- }
623
-
624
- .loader {
625
- border: 1px solid #d0d0d0 !important; /* Light grey background */
626
- border-top: 1px solid #db3434 !important; /* Blue color */
627
- border-right: 1px solid #3498db !important; /* Blue color */
628
  border-radius: 50%;
629
- width: 20px;
630
- height: 20px;
631
- animation: spin 2s linear infinite;
632
- display:inline-block;
633
- margin-right:10px !important;
634
- }
635
-
636
- .checkmark{
637
- color:green !important;
638
- font-size:18px;
639
- margin-right:10px !important;
640
- }
641
-
642
- @keyframes spin {
643
- 0% { transform: rotate(0deg); }
644
- 100% { transform: rotate(360deg); }
645
- }
646
-
647
-
648
- .relevancy-score{
649
- margin-top:10px !important;
650
- font-size:10px !important;
651
- font-style:italic;
652
- }
653
-
654
- .score-green{
655
- color:green !important;
656
- }
657
-
658
- .score-orange{
659
- color:orange !important;
660
- }
661
-
662
- .score-red{
663
- color:red !important;
664
- }
665
-
666
- /* Mobile specific adjustments */
667
- @media screen and (max-width: 767px) {
668
- div#tab-recommended_content {
669
- max-height: 50vh; /* Reduce height for smaller screens */
670
- overflow-y: auto;
671
- }
672
  }
673
 
674
- /* Additional style for scrollable tab content */
675
- div#tab-saved-graphs {
676
- overflow-y: auto; /* Enable vertical scrolling */
677
- max-height: 80vh; /* Adjust height as needed */
678
  }
679
 
680
- /* Mobile specific adjustments */
681
- @media screen and (max-width: 767px) {
682
- div#tab-saved-graphs {
683
- max-height: 50vh; /* Reduce height for smaller screens */
684
- overflow-y: auto;
685
- }
686
  }
687
- .message-buttons-left.panel.message-buttons.with-avatar {
688
- display: none;
 
689
  }
690
-
691
-
692
- /* Specific fixes for Hugging Face Space iframe */
693
- .h-full {
694
- height: auto !important;
695
- min-height: 0 !important;
696
- }
697
-
698
- .space-content {
699
- height: auto !important;
700
- max-height: 100vh !important;
701
- overflow: hidden;
702
  }
 
1
+ /* Root Variables */
2
  /* :root {
3
  --user-image: url('https://ih1.redbubble.net/image.4776899543.6215/st,small,507x507-pad,600x600,f8f8f8.jpg');
4
+ } */
5
+
6
+ /* Layout & Container Styles */
7
+ .gradio-container {
8
+ width: 100% !important;
9
+ max-width: 100% !important;
10
+ }
11
+
12
+ main.flex.flex-1.flex-col {
13
+ max-height: 95vh !important;
14
+ }
15
+
16
+ .main-component {
17
+ contain: size layout;
18
+ overflow: hidden;
19
+ }
20
 
21
+ /* Tab Styles */
22
+ #tab-recommended_content {
23
+ padding: 0;
 
24
  }
25
+
26
  #group-subtabs {
27
  /* display: block; */
 
28
  position : sticky;
29
  }
30
 
 
 
 
 
 
31
 
 
 
32
  }
33
 
34
+ .tab-nav {
35
+ border: none !important;
36
+ }
37
 
38
+ .tab-nav > button.selected {
39
+ color: #4b8ec3;
 
40
  font-weight: bold;
41
+ border: none;
42
+ }
43
 
44
+ .tabitem {
45
+ border: none !important;
46
  }
47
 
48
+ .other-tabs > div {
49
+ padding: 40px 40px 10px;
 
 
 
50
  }
51
 
52
+ /* Card Styles */
53
+ .card {
54
+ background-color: white;
55
+ border-radius: 10px;
56
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
57
+ overflow: hidden;
58
+ display: flex;
59
+ flex-direction: column;
60
+ margin: 20px;
61
+ }
62
 
63
+ .card-content {
64
+ padding: 20px;
65
+ }
66
 
67
+ .card-content h2 {
68
+ font-size: 14px !important;
 
69
  font-weight: bold;
70
+ margin: 0 0 10px !important;
71
+ color: #dc2626 !important;
72
+ }
73
+
74
+ .card-content p {
75
+ font-size: 12px;
76
+ margin-bottom: 0;
77
+ }
78
+
79
+ .card-content img {
80
+ display: block;
81
+ margin: auto;
82
+ max-width: 100%;
83
+ height: auto;
84
+ }
85
+
86
+ .card-footer {
87
+ background-color: #f4f4f4;
88
+ font-size: 10px;
89
+ padding: 10px;
90
+ display: flex;
91
+ justify-content: space-between;
92
+ align-items: center;
93
+ }
94
+
95
+ .card-footer span {
96
+ flex-grow: 1;
97
  text-align: left;
98
+ color: #999 !important;
99
  }
100
 
101
+ .card-image > .card-content {
102
+ background-color: #f1f7fa;
103
+ }
104
 
105
+ /* Message & Chat Styles */
106
+ .message {
107
+ font-size: 14px !important;
108
  }
109
 
110
+ .message.user, .message.bot {
111
+ border: none;
 
112
  }
113
 
114
+ #input-textbox > label > textarea {
115
+ border-radius: 40px;
116
+ padding-left: 30px;
117
+ resize: none;
 
 
 
 
 
118
  }
119
 
120
+ #input-message > div {
121
+ border: none;
 
 
 
 
 
122
  }
123
 
124
+ /* Alert Boxes */
125
  .warning-box {
126
  background-color: #fff3cd;
127
  border: 1px solid #ffeeba;
 
131
  color: #856404;
132
  display: inline-block;
133
  margin-bottom: 15px;
134
+ }
 
135
 
136
  .tip-box {
137
  background-color: #f0f9ff;
138
  border: 1px solid #80d4fa;
139
  border-radius: 4px;
140
+ margin: 20px 0 15px;
141
  padding: 15px 20px;
142
  font-size: 14px;
143
  display: inline-block;
 
144
  width: auto;
145
+ color: black !important;
 
 
 
 
 
 
 
 
 
146
  }
147
 
 
148
  .tip-box-title {
149
  font-weight: bold;
150
  font-size: 14px;
 
156
  margin-right: 5px;
157
  }
158
 
159
+ /* Loader Animation */
160
+ .loader {
161
+ border: 1px solid #d0d0d0 !important;
162
+ border-top: 1px solid #db3434 !important;
163
+ border-right: 1px solid #3498db !important;
164
+ border-radius: 50%;
165
+ width: 20px;
166
+ height: 20px;
167
+ animation: spin 2s linear infinite;
168
+ display: inline-block;
169
+ margin-right: 10px !important;
170
  }
171
 
172
+ @keyframes spin {
173
+ 0% { transform: rotate(0deg); }
174
+ 100% { transform: rotate(360deg); }
 
 
 
 
 
 
175
  }
176
 
177
+ /* PDF Link Styles */
178
+ .pdf-link {
179
+ display: inline-flex;
180
+ align-items: center;
181
+ margin-left: auto;
182
+ text-decoration: none!important;
183
+ font-size: 14px;
184
  }
185
 
186
+ /* Document Reference Styles */
187
+ .doc-ref sup {
188
+ color: #dc2626!important;
189
  }
190
 
191
+ .doc-ref {
192
+ color: #dc2626!important;
193
+ margin-right: 1px;
194
+ }
195
 
196
+ /* Chatbot & Image Styles */
197
+ span.chatbot > p > img {
198
+ margin-top: 40px !important;
199
+ max-height: none !important;
200
+ max-width: 80% !important;
201
+ border-radius: 0px !important;
 
 
202
  }
203
 
204
+ .chatbot-caption {
205
+ font-size: 11px;
206
+ font-style: italic;
207
+ color: #508094;
208
  }
209
 
210
+ .ai-generated {
211
+ font-size: 11px!important;
212
+ font-style: italic;
213
+ color: #73b8d4 !important;
 
 
214
  }
215
 
216
+ /* Dropdown Styles */
217
+ .dropdown {
218
+ position: relative;
219
+ display: inline-block;
220
+ margin-bottom: 10px;
221
  }
222
 
223
+ .dropdown-toggle {
224
+ background-color: #f2f2f2;
225
+ color: black;
226
  padding: 10px;
227
+ font-size: 16px;
228
+ cursor: pointer;
229
  display: flex;
230
+ width: 400px;
231
  align-items: center;
232
+ justify-content: left;
233
+ position: relative;
234
  }
235
 
236
+ .dropdown-toggle .caret {
237
+ content: "";
238
+ position: absolute;
239
+ right: 10px;
240
+ top: 50%;
241
+ border-left: 5px solid transparent;
242
+ border-right: 5px solid transparent;
243
+ border-top: 5px solid black;
244
+ transform: translateY(-50%);
245
  }
246
 
247
+ .dropdown-content {
248
+ display: none;
249
+ position: absolute;
250
+ background-color: #f9f9f9;
251
+ min-width: 300px;
252
+ box-shadow: 0 8px 16px 0 rgba(0,0,0,0.2);
253
+ z-index: 1;
254
+ padding: 12px;
255
+ border: 1px solid #ccc;
256
  }
257
 
258
+ /* Checkbox Styles */
259
+ input[type="checkbox"] {
260
+ display: none !important;
 
 
 
261
  }
262
 
263
+ #checkbox-chat input[type="checkbox"] {
264
+ display: flex !important;
 
265
  }
266
 
267
+ input[type="checkbox"]:checked + .dropdown-content {
268
+ display: block;
 
269
  }
270
 
271
+ input[type="checkbox"]:checked + .dropdown-toggle + .dropdown-content {
272
+ display: block;
273
  }
274
 
275
+ input[type="checkbox"]:checked + .dropdown-toggle .caret {
276
+ border-top: 0;
277
+ border-bottom: 5px solid black;
 
 
 
 
 
278
  }
279
+
280
+ /* Modal Styles */
281
  #modal-config {
282
  position: fixed;
283
  top: 0;
 
290
  padding: 15px;
291
  transform: none;
292
  }
293
+
294
+ #modal-config .block.modal-block.padded {
295
+ padding-top: 25px;
296
+ height: 100vh;
297
  }
298
 
299
+ #modal-config .modal-container {
300
+ margin: 0px;
301
+ padding: 0px;
302
+ }
 
303
 
304
+ #modal-config .close {
305
+ display: none;
 
 
 
 
 
 
 
 
306
  }
307
+
308
+ /* Config Button Styles */
309
+ #config-button {
310
  background: none;
311
  border: none;
312
  padding: 8px;
 
329
  background-color: rgba(0, 0, 0, 0.1);
330
  }
331
 
332
+ /* Relevancy Score Styles */
333
+ .relevancy-score {
334
+ margin-top: 10px !important;
335
+ font-size: 10px !important;
336
+ font-style: italic;
337
+ }
338
+
339
+ .score-green {
340
+ color: green !important;
341
+ }
342
+
343
+ .score-orange {
344
+ color: orange !important;
345
+ }
346
+
347
+ .score-red {
348
+ color: red !important;
349
+ }
350
+
351
+ /* Gallery Styles */
352
+ .gallery-item > div {
353
+ white-space: normal !important;
354
+ word-break: break-word !important;
355
+ overflow-wrap: break-word !important;
356
+ }
357
+
358
+ /* Avatar Styles */
359
+ .avatar-container.svelte-1x5p6hu:not(.thumbnail-item) img {
360
+ width: 100%;
361
+ height: 100%;
362
+ object-fit: cover;
363
+ border-radius: 50%;
364
+ padding: 0px;
365
+ margin: 0px;
366
+ }
367
+
368
+ /* Message Button Styles */
369
+ .message-buttons-left.panel.message-buttons.with-avatar {
370
+ display: none;
371
+ }
372
+
373
+ /* Checkmark Styles */
374
+ .checkmark {
375
+ color: green !important;
376
+ font-size: 18px;
377
+ margin-right: 10px !important;
378
+ }
379
+
380
+ /* Papers Summary & Relevant Popup Styles */
381
+ #papers-summary-popup button span,
382
+ #papers-relevant-popup span {
383
+ font-size: 16px;
384
+ font-weight: bold;
385
+ text-align: center;
386
+ }
387
+
388
+ /* Citations Tab Button Style */
389
+ #tab-citations .button {
390
+ padding: 12px 16px;
391
+ font-size: 16px;
392
+ font-weight: bold;
393
+ cursor: pointer;
394
  border: none;
395
+ outline: none;
396
+ text-align: left;
397
+ transition: background-color 0.3s ease;
398
+ }
399
+
400
+ /* Show Figures Button Style */
401
+ button#show-figures {
402
+ background-color: #f5f5f5;
403
+ border: 1px solid #e0e0e0;
404
+ border-radius: 4px;
405
+ color: #333333;
406
  cursor: pointer;
407
+ width: 100%;
 
 
 
 
 
 
 
408
  text-align: center;
409
  }
410
+
411
+ /* Gradio Box Style */
412
+ .gr-box {
413
+ border-color: #d6c37c;
414
  }
415
 
416
+ /* Hidden Message Style */
417
+ #hidden-message {
418
+ display: none;
419
+ }
420
 
421
+ /* Label Selected Style */
422
+ label.selected {
423
+ background: #93c5fd !important;
424
+ }
425
 
426
+ /* Submit Button Style */
427
+ #submit-button {
428
+ padding: 0px !important;
429
+ }
 
 
430
 
431
+ /* Hugging Face Space Fixes */
432
+ .h-full {
433
+ height: auto !important;
434
+ min-height: 0 !important;
435
+ }
 
 
436
 
437
+ .space-content {
438
+ height: auto !important;
439
+ max-height: 100vh !important;
440
+ overflow: hidden;
441
+ }
442
 
443
+ /* Dropdown Samples Style */
444
+ #dropdown-samples {
445
+ background: none !important;
446
+ }
447
 
448
+ #dropdown-samples > .container > .wrap {
449
+ background-color: white;
450
+ }
 
 
 
451
 
452
+ /* Tab Examples Form Style */
453
+ #tab-examples > div > .form {
454
+ border: none;
455
+ background: none !important;
456
+ }
 
 
 
 
457
 
458
+ /* Utility Classes */
459
+ .hidden {
460
+ display: none !important;
461
+ }
 
462
 
463
+ footer {
464
+ display: none !important;
465
+ visibility: hidden;
466
+ }
 
467
 
468
+ a {
469
+ text-decoration: none;
470
+ color: inherit;
471
+ }
472
+
473
+ .a-doc-ref {
474
+ text-decoration: none !important;
475
+ }
476
+
477
+ /* Media Queries */
478
+ /* Desktop Media Query */
479
+ @media screen and (min-width: 1024px) {
480
+ .gradio-container {
481
+ max-height: calc(100vh - 190px) !important;
482
+ overflow: hidden;
483
+ }
484
+ div#tab-examples,
485
+ div#sources-textbox,
486
+ div#tab-config {
487
+ height: calc(100vh - 190px) !important;
488
  overflow-y: scroll !important;
489
  }
490
+ div#tab-vanna,
491
+ div#sources-figures,
492
+ div#graphs-container,
493
+ div#tab-citations {
494
+ height: calc(100vh - 300px) !important;
495
+ max-height: 90vh !important;
496
  overflow-y: scroll !important;
 
497
  }
498
 
499
+ div#chatbot-row {
500
+ max-height: calc(100vh - 90px) !important;
 
 
501
  }
502
 
503
+ div#graphs-container {
504
+ height: calc(100vh - 210px) !important;
505
+ overflow-y: scroll !important;
506
  }
 
 
507
 
508
+ div#tab-saved-graphs {
 
 
509
  overflow-y: auto;
510
+ max-height: 80vh;
511
  }
 
 
 
 
 
 
 
512
  }
513
 
514
+ /* Mobile Media Query */
515
  @media screen and (max-width: 767px) {
516
+ div#chatbot {
517
+ height: 500px !important;
 
 
518
  }
519
 
520
+ #submit-button {
521
+ padding: 0 !important;
522
  min-width: 80px;
523
  }
524
 
 
525
  div.tab-nav button {
526
  display: none !important;
527
  }
528
 
529
+ div.tab-nav button:first-child,
 
 
 
 
 
530
  div.tab-nav button:nth-child(2) {
531
  display: block !important;
532
  }
533
+
534
+ #right-panel button {
535
  display: block !important;
536
  }
537
 
538
+ div#tab-recommended_content {
539
+ max-height: 50vh;
540
+ overflow-y: auto;
541
+ }
542
+
543
+ div#tab-saved-graphs {
544
+ max-height: 50vh;
545
+ overflow-y: auto;
546
+ }
547
  }
548
 
549
+ /* Dark Mode */
550
  @media (prefers-color-scheme: dark) {
551
+ .card {
552
  background-color: #374151;
553
  }
554
+
555
+ .card-image > .card-content {
556
  background-color: rgb(55, 65, 81) !important;
557
  }
558
 
 
560
  background-color: #404652;
561
  }
562
 
563
+ .container > .wrap {
564
  background-color: #374151 !important;
565
+ color: white !important;
566
  }
567
+
568
+ .card-content h2 {
569
+ color: #e7754f !important;
 
 
 
570
  }
571
+
572
  .card-footer span {
573
+ color: white !important;
574
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
575
 
576
+ body.dark .warning-box *,
577
+ body.dark .tip-box * {
578
+ color: black !important;
579
+ }
580
 
581
+ .doc-ref sup {
582
+ color: rgb(235 109 35)!important;
583
+ }
584
  }
585
 
586
+ /* Checkbox Config Style */
587
+ #checkbox-config {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
588
  display: block;
 
 
 
 
 
 
 
 
589
  position: absolute;
590
+ background: none;
591
+ border: none;
592
+ padding: 8px;
593
+ cursor: pointer;
594
+ width: 40px;
595
+ height: 40px;
596
+ display: flex;
597
+ align-items: center;
598
+ justify-content: center;
 
 
 
 
 
 
 
 
 
 
 
 
599
  border-radius: 50%;
600
+ transition: background-color 0.2s;
601
+ font-size: 20px;
602
+ text-align: center;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
603
  }
604
 
605
+ #checkbox-config:checked {
606
+ display: block;
 
 
607
  }
608
 
609
+ #vanna-display {
610
+ max-height: 300px;
611
+ /* overflow-y: scroll; */
 
 
 
612
  }
613
+ #sql-query{
614
+ max-height: 100px;
615
+ overflow-y:scroll;
616
  }
617
+ #vanna-details{
618
+ max-height: 500px;
619
+ overflow-y:scroll;
 
 
 
 
 
 
 
 
 
620
  }