testPR
#13
by
TheoLvs
- opened
- .gitignore +0 -6
- README.md +1 -1
- app.py +310 -123
- climateqa/engine/chains/__init__.py +0 -0
- climateqa/engine/chains/answer_ai_impact.py +0 -46
- climateqa/engine/chains/answer_chitchat.py +0 -52
- climateqa/engine/chains/answer_rag.py +0 -99
- climateqa/engine/chains/intent_categorization.py +0 -86
- climateqa/engine/chains/keywords_extraction.py +0 -40
- climateqa/engine/chains/query_transformation.py +0 -193
- climateqa/engine/chains/retrieve_documents.py +0 -159
- climateqa/engine/chains/sample_router.py +0 -66
- climateqa/engine/chains/translation.py +0 -41
- climateqa/engine/embeddings.py +3 -6
- climateqa/engine/graph.py +0 -149
- climateqa/engine/llm/__init__.py +0 -3
- climateqa/engine/llm/ollama.py +0 -6
- climateqa/engine/{chains/prompts.py → prompts.py} +2 -2
- climateqa/engine/rag.py +134 -0
- climateqa/engine/{chains/reformulation.py → reformulation.py} +1 -1
- climateqa/engine/reranker.py +0 -40
- climateqa/{knowledge → engine}/retriever.py +83 -1
- climateqa/engine/utils.py +0 -17
- climateqa/knowledge/__init__.py +0 -0
- climateqa/papers/__init__.py +43 -0
- climateqa/{knowledge → papers}/openalex.py +12 -61
- front/__init__.py +0 -0
- front/callbacks.py +0 -0
- front/utils.py +0 -142
- requirements.txt +6 -13
- sandbox/20240310 - CQA - Semantic Routing 1.ipynb +0 -0
- style.css +0 -118
- test.json +0 -0
.gitignore
CHANGED
@@ -5,9 +5,3 @@ __pycache__/utils.cpython-38.pyc
|
|
5 |
|
6 |
notebooks/
|
7 |
*.pyc
|
8 |
-
|
9 |
-
**/.ipynb_checkpoints/
|
10 |
-
**/.flashrank_cache/
|
11 |
-
|
12 |
-
data/
|
13 |
-
sandbox/
|
|
|
5 |
|
6 |
notebooks/
|
7 |
*.pyc
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: 🌍
|
|
4 |
colorFrom: blue
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
fullWidth: true
|
10 |
pinned: false
|
|
|
4 |
colorFrom: blue
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.19.1
|
8 |
app_file: app.py
|
9 |
fullWidth: true
|
10 |
pinned: false
|
app.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
from climateqa.engine.embeddings import get_embeddings_function
|
2 |
embeddings_function = get_embeddings_function()
|
3 |
|
4 |
-
from climateqa.
|
5 |
from sentence_transformers import CrossEncoder
|
6 |
|
7 |
-
|
8 |
oa = OpenAlex()
|
9 |
|
10 |
import gradio as gr
|
@@ -15,8 +15,6 @@ import time
|
|
15 |
import re
|
16 |
import json
|
17 |
|
18 |
-
from gradio import ChatMessage
|
19 |
-
|
20 |
# from gradio_modal import Modal
|
21 |
|
22 |
from io import BytesIO
|
@@ -31,19 +29,16 @@ from utils import create_user_id
|
|
31 |
|
32 |
# ClimateQ&A imports
|
33 |
from climateqa.engine.llm import get_llm
|
|
|
34 |
from climateqa.engine.vectorstore import get_pinecone_vectorstore
|
35 |
-
from climateqa.
|
36 |
-
from climateqa.engine.reranker import get_reranker
|
37 |
from climateqa.engine.embeddings import get_embeddings_function
|
38 |
-
from climateqa.engine.
|
39 |
from climateqa.sample_questions import QUESTIONS
|
40 |
from climateqa.constants import POSSIBLE_REPORTS
|
41 |
from climateqa.utils import get_image_from_azure_blob_storage
|
42 |
from climateqa.engine.keywords import make_keywords_chain
|
43 |
-
|
44 |
-
from climateqa.engine.graph import make_graph_agent,display_graph
|
45 |
-
|
46 |
-
from front.utils import make_html_source,parse_output_llm_with_sources,serialize_docs,make_toolbox
|
47 |
|
48 |
# Load environment variables in local mode
|
49 |
try:
|
@@ -86,21 +81,48 @@ user_id = create_user_id()
|
|
86 |
|
87 |
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
# Create vectorstore and retriever
|
90 |
vectorstore = get_pinecone_vectorstore(embeddings_function)
|
91 |
llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
|
92 |
-
reranker = get_reranker("nano")
|
93 |
-
agent = make_graph_agent(llm,vectorstore,reranker)
|
94 |
|
95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
|
98 |
async def chat(query,history,audience,sources,reports):
|
99 |
"""taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of:
|
100 |
(messages in gradio format, messages in langchain format, source documents)"""
|
101 |
|
102 |
-
|
103 |
-
print(f">> NEW QUESTION ({date_now}) : {query}")
|
104 |
|
105 |
if audience == "Children":
|
106 |
audience_prompt = audience_prompts["children"]
|
@@ -115,79 +137,77 @@ async def chat(query,history,audience,sources,reports):
|
|
115 |
if len(sources) == 0:
|
116 |
sources = ["IPCC"]
|
117 |
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
|
124 |
-
|
125 |
-
|
126 |
-
#
|
127 |
-
|
|
|
|
|
|
|
|
|
128 |
|
129 |
-
docs = []
|
130 |
docs_html = ""
|
131 |
output_query = ""
|
132 |
output_language = ""
|
133 |
output_keywords = ""
|
134 |
gallery = []
|
135 |
-
start_streaming = False
|
136 |
|
137 |
-
steps_display = {
|
138 |
-
"categorize_intent":("🔄️ Analyzing user message",True),
|
139 |
-
"transform_query":("🔄️ Thinking step by step to answer the question",True),
|
140 |
-
"retrieve_documents":("🔄️ Searching in the knowledge base",False),
|
141 |
-
}
|
142 |
-
|
143 |
-
used_documents = []
|
144 |
-
answer_message_content = ""
|
145 |
try:
|
146 |
-
async for
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
print(f"Error getting documents: {e}")
|
164 |
-
print(event)
|
165 |
-
|
166 |
-
elif event["name"] in steps_display.keys() and event["event"] == "on_chain_start": #display steps
|
167 |
-
event_description,display_output = steps_display[node]
|
168 |
-
if not hasattr(history[-1], 'metadata') or history[-1].metadata["title"] != event_description: # if a new step begins
|
169 |
-
history.append(ChatMessage(role="assistant", content = "", metadata={'title' :event_description}))
|
170 |
-
|
171 |
-
elif event["name"] != "transform_query" and event["event"] == "on_chat_model_stream" and node in ["answer_rag", "answer_search"]:# if streaming answer
|
172 |
-
if start_streaming == False:
|
173 |
-
start_streaming = True
|
174 |
-
history.append(ChatMessage(role="assistant", content = ""))
|
175 |
-
answer_message_content += event["data"]["chunk"].content
|
176 |
-
answer_message_content = parse_output_llm_with_sources(answer_message_content)
|
177 |
-
history[-1] = ChatMessage(role="assistant", content = answer_message_content)
|
178 |
-
# history.append(ChatMessage(role="assistant", content = new_message_content))
|
179 |
-
|
180 |
-
if event["name"] == "transform_query" and event["event"] =="on_chain_end":
|
181 |
-
if hasattr(history[-1],"content"):
|
182 |
-
history[-1].content += "Decompose question into sub-questions: \n\n - " + "\n - ".join([q["question"] for q in event["data"]["output"]["remaining_questions"]])
|
183 |
-
|
184 |
-
if event["name"] == "categorize_intent" and event["event"] == "on_chain_start":
|
185 |
-
print("X")
|
186 |
|
187 |
-
|
188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
except Exception as e:
|
190 |
-
print(event, "has failed")
|
191 |
raise gr.Error(f"{e}")
|
192 |
|
193 |
|
@@ -196,7 +216,7 @@ async def chat(query,history,audience,sources,reports):
|
|
196 |
if os.getenv("GRADIO_ENV") != "local":
|
197 |
timestamp = str(datetime.now().timestamp())
|
198 |
file = timestamp + ".json"
|
199 |
-
prompt = history[1][
|
200 |
logs = {
|
201 |
"user_id": str(user_id),
|
202 |
"prompt": prompt,
|
@@ -204,7 +224,7 @@ async def chat(query,history,audience,sources,reports):
|
|
204 |
"question":output_query,
|
205 |
"sources":sources,
|
206 |
"docs":serialize_docs(docs),
|
207 |
-
"answer": history[-1]
|
208 |
"time": timestamp,
|
209 |
}
|
210 |
log_on_azure(file, logs, share_client)
|
@@ -232,24 +252,99 @@ async def chat(query,history,audience,sources,reports):
|
|
232 |
except Exception as e:
|
233 |
print(f"Skipped adding image {i} because of {e}")
|
234 |
|
235 |
-
|
236 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
237 |
|
238 |
-
# gallery = [x["img"] for x in list(image_dict.values())]
|
239 |
-
# img = list(image_dict.values())[0]
|
240 |
-
# img_md = img["md"]
|
241 |
-
# img_caption = img["caption"]
|
242 |
-
# img_code = img["figure_code"]
|
243 |
-
# if img_code != "N/A":
|
244 |
-
# img_name = f"{img['key']} - {img['figure_code']}"
|
245 |
-
# else:
|
246 |
-
# img_name = f"{img['key']}"
|
247 |
|
248 |
-
# answer_yet = history[-1][1] + f"\n\n{img_md}\n<p class='chatbot-caption'><b>{img_name}</b> - {img_caption}</p>"
|
249 |
-
# history[-1] = (history[-1][0],answer_yet)
|
250 |
-
# history = [tuple(x) for x in history]
|
251 |
|
252 |
-
|
|
|
|
|
|
|
|
|
|
|
253 |
|
254 |
|
255 |
def save_feedback(feed: str, user_id):
|
@@ -295,6 +390,56 @@ papers_cols_widths = {
|
|
295 |
papers_cols = list(papers_cols_widths.keys())
|
296 |
papers_cols_widths = list(papers_cols_widths.values())
|
297 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
298 |
|
299 |
# --------------------------------------------------------------------
|
300 |
# Gradio
|
@@ -324,21 +469,19 @@ def vote(data: gr.LikeData):
|
|
324 |
|
325 |
|
326 |
|
327 |
-
with gr.Blocks(title="Climate Q&A",
|
|
|
328 |
|
329 |
with gr.Tab("ClimateQ&A"):
|
330 |
|
331 |
with gr.Row(elem_id="chatbot-row"):
|
332 |
with gr.Column(scale=2):
|
|
|
333 |
chatbot = gr.Chatbot(
|
334 |
-
value
|
335 |
-
|
336 |
-
show_copy_button=True,
|
337 |
-
show_label = False,
|
338 |
-
elem_id="chatbot",
|
339 |
-
layout = "panel",
|
340 |
avatar_images = (None,"https://i.ibb.co/YNyd5W2/logo4.png"),
|
341 |
-
)
|
342 |
|
343 |
# bot.like(vote,None,None)
|
344 |
|
@@ -346,7 +489,8 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
346 |
|
347 |
with gr.Row(elem_id = "input-message"):
|
348 |
textbox=gr.Textbox(placeholder="Ask me anything here!",show_label=False,scale=7,lines = 1,interactive = True,elem_id="input-textbox")
|
349 |
-
|
|
|
350 |
|
351 |
with gr.Column(scale=1, variant="panel",elem_id = "right-panel"):
|
352 |
|
@@ -416,6 +560,9 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
416 |
|
417 |
|
418 |
|
|
|
|
|
|
|
419 |
#---------------------------------------------------------------------------------------
|
420 |
# OTHER TABS
|
421 |
#---------------------------------------------------------------------------------------
|
@@ -424,25 +571,25 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
424 |
with gr.Tab("Figures",elem_id = "tab-images",elem_classes = "max-height other-tabs"):
|
425 |
gallery_component = gr.Gallery()
|
426 |
|
427 |
-
|
428 |
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
|
436 |
-
|
437 |
|
438 |
-
|
439 |
-
|
440 |
|
441 |
-
|
442 |
-
|
443 |
|
444 |
-
|
445 |
-
|
446 |
|
447 |
|
448 |
|
@@ -453,9 +600,8 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
453 |
|
454 |
|
455 |
def start_chat(query,history):
|
456 |
-
|
457 |
-
|
458 |
-
history = history + [ChatMessage(role="user", content=query)]
|
459 |
return (gr.update(interactive = False),gr.update(selected=1),history)
|
460 |
|
461 |
def finish_chat():
|
@@ -463,13 +609,13 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
463 |
|
464 |
(textbox
|
465 |
.submit(start_chat, [textbox,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
|
466 |
-
.then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component],concurrency_limit = 8,api_name = "chat_textbox")
|
467 |
.then(finish_chat, None, [textbox],api_name = "finish_chat_textbox")
|
468 |
)
|
469 |
|
470 |
(examples_hidden
|
471 |
.change(start_chat, [examples_hidden,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
|
472 |
-
.then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component],concurrency_limit = 8,api_name = "chat_examples")
|
473 |
.then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
|
474 |
)
|
475 |
|
@@ -484,7 +630,48 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
484 |
|
485 |
dropdown_samples.change(change_sample_questions,dropdown_samples,samples)
|
486 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
487 |
|
488 |
demo.queue()
|
489 |
|
490 |
-
demo.launch(
|
|
|
1 |
from climateqa.engine.embeddings import get_embeddings_function
|
2 |
embeddings_function = get_embeddings_function()
|
3 |
|
4 |
+
from climateqa.papers.openalex import OpenAlex
|
5 |
from sentence_transformers import CrossEncoder
|
6 |
|
7 |
+
reranker = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")
|
8 |
oa = OpenAlex()
|
9 |
|
10 |
import gradio as gr
|
|
|
15 |
import re
|
16 |
import json
|
17 |
|
|
|
|
|
18 |
# from gradio_modal import Modal
|
19 |
|
20 |
from io import BytesIO
|
|
|
29 |
|
30 |
# ClimateQ&A imports
|
31 |
from climateqa.engine.llm import get_llm
|
32 |
+
from climateqa.engine.rag import make_rag_chain
|
33 |
from climateqa.engine.vectorstore import get_pinecone_vectorstore
|
34 |
+
from climateqa.engine.retriever import ClimateQARetriever
|
|
|
35 |
from climateqa.engine.embeddings import get_embeddings_function
|
36 |
+
from climateqa.engine.prompts import audience_prompts
|
37 |
from climateqa.sample_questions import QUESTIONS
|
38 |
from climateqa.constants import POSSIBLE_REPORTS
|
39 |
from climateqa.utils import get_image_from_azure_blob_storage
|
40 |
from climateqa.engine.keywords import make_keywords_chain
|
41 |
+
from climateqa.engine.rag import make_rag_papers_chain
|
|
|
|
|
|
|
42 |
|
43 |
# Load environment variables in local mode
|
44 |
try:
|
|
|
81 |
|
82 |
|
83 |
|
84 |
+
def parse_output_llm_with_sources(output):
|
85 |
+
# Split the content into a list of text and "[Doc X]" references
|
86 |
+
content_parts = re.split(r'\[(Doc\s?\d+(?:,\s?Doc\s?\d+)*)\]', output)
|
87 |
+
parts = []
|
88 |
+
for part in content_parts:
|
89 |
+
if part.startswith("Doc"):
|
90 |
+
subparts = part.split(",")
|
91 |
+
subparts = [subpart.lower().replace("doc","").strip() for subpart in subparts]
|
92 |
+
subparts = [f"""<a href="#doc{subpart}" class="a-doc-ref" target="_self"><span class='doc-ref'><sup>{subpart}</sup></span></a>""" for subpart in subparts]
|
93 |
+
parts.append("".join(subparts))
|
94 |
+
else:
|
95 |
+
parts.append(part)
|
96 |
+
content_parts = "".join(parts)
|
97 |
+
return content_parts
|
98 |
+
|
99 |
+
|
100 |
# Create vectorstore and retriever
|
101 |
vectorstore = get_pinecone_vectorstore(embeddings_function)
|
102 |
llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
|
|
|
|
|
103 |
|
104 |
|
105 |
+
def make_pairs(lst):
|
106 |
+
"""from a list of even lenght, make tupple pairs"""
|
107 |
+
return [(lst[i], lst[i + 1]) for i in range(0, len(lst), 2)]
|
108 |
+
|
109 |
+
|
110 |
+
def serialize_docs(docs):
|
111 |
+
new_docs = []
|
112 |
+
for doc in docs:
|
113 |
+
new_doc = {}
|
114 |
+
new_doc["page_content"] = doc.page_content
|
115 |
+
new_doc["metadata"] = doc.metadata
|
116 |
+
new_docs.append(new_doc)
|
117 |
+
return new_docs
|
118 |
+
|
119 |
|
120 |
|
121 |
async def chat(query,history,audience,sources,reports):
|
122 |
"""taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of:
|
123 |
(messages in gradio format, messages in langchain format, source documents)"""
|
124 |
|
125 |
+
print(f">> NEW QUESTION : {query}")
|
|
|
126 |
|
127 |
if audience == "Children":
|
128 |
audience_prompt = audience_prompts["children"]
|
|
|
137 |
if len(sources) == 0:
|
138 |
sources = ["IPCC"]
|
139 |
|
140 |
+
if len(reports) == 0:
|
141 |
+
reports = []
|
142 |
+
|
143 |
+
retriever = ClimateQARetriever(vectorstore=vectorstore,sources = sources,min_size = 200,reports = reports,k_summary = 3,k_total = 15,threshold=0.5)
|
144 |
+
rag_chain = make_rag_chain(retriever,llm)
|
145 |
|
146 |
+
inputs = {"query": query,"audience": audience_prompt}
|
147 |
+
result = rag_chain.astream_log(inputs) #{"callbacks":[MyCustomAsyncHandler()]})
|
148 |
+
# result = rag_chain.stream(inputs)
|
149 |
+
|
150 |
+
path_reformulation = "/logs/reformulation/final_output"
|
151 |
+
path_keywords = "/logs/keywords/final_output"
|
152 |
+
path_retriever = "/logs/find_documents/final_output"
|
153 |
+
path_answer = "/logs/answer/streamed_output_str/-"
|
154 |
|
|
|
155 |
docs_html = ""
|
156 |
output_query = ""
|
157 |
output_language = ""
|
158 |
output_keywords = ""
|
159 |
gallery = []
|
|
|
160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
try:
|
162 |
+
async for op in result:
|
163 |
+
|
164 |
+
op = op.ops[0]
|
165 |
+
|
166 |
+
if op['path'] == path_reformulation: # reforulated question
|
167 |
+
try:
|
168 |
+
output_language = op['value']["language"] # str
|
169 |
+
output_query = op["value"]["question"]
|
170 |
+
except Exception as e:
|
171 |
+
raise gr.Error(f"ClimateQ&A Error: {e} - The error has been noted, try another question and if the error remains, you can contact us :)")
|
172 |
+
|
173 |
+
if op["path"] == path_keywords:
|
174 |
+
try:
|
175 |
+
output_keywords = op['value']["keywords"] # str
|
176 |
+
output_keywords = " AND ".join(output_keywords)
|
177 |
+
except Exception as e:
|
178 |
+
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
|
180 |
+
|
181 |
+
elif op['path'] == path_retriever: # documents
|
182 |
+
try:
|
183 |
+
docs = op['value']['docs'] # List[Document]
|
184 |
+
docs_html = []
|
185 |
+
for i, d in enumerate(docs, 1):
|
186 |
+
docs_html.append(make_html_source(d, i))
|
187 |
+
docs_html = "".join(docs_html)
|
188 |
+
except TypeError:
|
189 |
+
print("No documents found")
|
190 |
+
print("op: ",op)
|
191 |
+
continue
|
192 |
+
|
193 |
+
elif op['path'] == path_answer: # final answer
|
194 |
+
new_token = op['value'] # str
|
195 |
+
# time.sleep(0.01)
|
196 |
+
previous_answer = history[-1][1]
|
197 |
+
previous_answer = previous_answer if previous_answer is not None else ""
|
198 |
+
answer_yet = previous_answer + new_token
|
199 |
+
answer_yet = parse_output_llm_with_sources(answer_yet)
|
200 |
+
history[-1] = (query,answer_yet)
|
201 |
+
|
202 |
+
|
203 |
+
|
204 |
+
else:
|
205 |
+
continue
|
206 |
+
|
207 |
+
history = [tuple(x) for x in history]
|
208 |
+
yield history,docs_html,output_query,output_language,gallery,output_query,output_keywords
|
209 |
+
|
210 |
except Exception as e:
|
|
|
211 |
raise gr.Error(f"{e}")
|
212 |
|
213 |
|
|
|
216 |
if os.getenv("GRADIO_ENV") != "local":
|
217 |
timestamp = str(datetime.now().timestamp())
|
218 |
file = timestamp + ".json"
|
219 |
+
prompt = history[-1][0]
|
220 |
logs = {
|
221 |
"user_id": str(user_id),
|
222 |
"prompt": prompt,
|
|
|
224 |
"question":output_query,
|
225 |
"sources":sources,
|
226 |
"docs":serialize_docs(docs),
|
227 |
+
"answer": history[-1][1],
|
228 |
"time": timestamp,
|
229 |
}
|
230 |
log_on_azure(file, logs, share_client)
|
|
|
252 |
except Exception as e:
|
253 |
print(f"Skipped adding image {i} because of {e}")
|
254 |
|
255 |
+
if len(image_dict) > 0:
|
256 |
+
|
257 |
+
gallery = [x["img"] for x in list(image_dict.values())]
|
258 |
+
img = list(image_dict.values())[0]
|
259 |
+
img_md = img["md"]
|
260 |
+
img_caption = img["caption"]
|
261 |
+
img_code = img["figure_code"]
|
262 |
+
if img_code != "N/A":
|
263 |
+
img_name = f"{img['key']} - {img['figure_code']}"
|
264 |
+
else:
|
265 |
+
img_name = f"{img['key']}"
|
266 |
+
|
267 |
+
answer_yet = history[-1][1] + f"\n\n{img_md}\n<p class='chatbot-caption'><b>{img_name}</b> - {img_caption}</p>"
|
268 |
+
history[-1] = (history[-1][0],answer_yet)
|
269 |
+
history = [tuple(x) for x in history]
|
270 |
+
|
271 |
+
# gallery = [x.metadata["image_path"] for x in docs if (len(x.metadata["image_path"]) > 0 and "IAS" in x.metadata["image_path"])]
|
272 |
+
# if len(gallery) > 0:
|
273 |
+
# gallery = list(set("|".join(gallery).split("|")))
|
274 |
+
# gallery = [get_image_from_azure_blob_storage(x) for x in gallery]
|
275 |
+
|
276 |
+
yield history,docs_html,output_query,output_language,gallery,output_query,output_keywords
|
277 |
+
|
278 |
+
|
279 |
+
def make_html_source(source,i):
|
280 |
+
meta = source.metadata
|
281 |
+
# content = source.page_content.split(":",1)[1].strip()
|
282 |
+
content = source.page_content.strip()
|
283 |
+
|
284 |
+
toc_levels = []
|
285 |
+
for j in range(2):
|
286 |
+
level = meta[f"toc_level{j}"]
|
287 |
+
if level != "N/A":
|
288 |
+
toc_levels.append(level)
|
289 |
+
else:
|
290 |
+
break
|
291 |
+
toc_levels = " > ".join(toc_levels)
|
292 |
+
|
293 |
+
if len(toc_levels) > 0:
|
294 |
+
name = f"<b>{toc_levels}</b><br/>{meta['name']}"
|
295 |
+
else:
|
296 |
+
name = meta['name']
|
297 |
+
|
298 |
+
if meta["chunk_type"] == "text":
|
299 |
+
|
300 |
+
card = f"""
|
301 |
+
<div class="card" id="doc{i}">
|
302 |
+
<div class="card-content">
|
303 |
+
<h2>Doc {i} - {meta['short_name']} - Page {int(meta['page_number'])}</h2>
|
304 |
+
<p>{content}</p>
|
305 |
+
</div>
|
306 |
+
<div class="card-footer">
|
307 |
+
<span>{name}</span>
|
308 |
+
<a href="{meta['url']}#page={int(meta['page_number'])}" target="_blank" class="pdf-link">
|
309 |
+
<span role="img" aria-label="Open PDF">🔗</span>
|
310 |
+
</a>
|
311 |
+
</div>
|
312 |
+
</div>
|
313 |
+
"""
|
314 |
+
|
315 |
+
else:
|
316 |
+
|
317 |
+
if meta["figure_code"] != "N/A":
|
318 |
+
title = f"{meta['figure_code']} - {meta['short_name']}"
|
319 |
+
else:
|
320 |
+
title = f"{meta['short_name']}"
|
321 |
+
|
322 |
+
card = f"""
|
323 |
+
<div class="card card-image">
|
324 |
+
<div class="card-content">
|
325 |
+
<h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
|
326 |
+
<p>{content}</p>
|
327 |
+
<p class='ai-generated'>AI-generated description</p>
|
328 |
+
</div>
|
329 |
+
<div class="card-footer">
|
330 |
+
<span>{name}</span>
|
331 |
+
<a href="{meta['url']}#page={int(meta['page_number'])}" target="_blank" class="pdf-link">
|
332 |
+
<span role="img" aria-label="Open PDF">🔗</span>
|
333 |
+
</a>
|
334 |
+
</div>
|
335 |
+
</div>
|
336 |
+
"""
|
337 |
+
|
338 |
+
return card
|
339 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
340 |
|
|
|
|
|
|
|
341 |
|
342 |
+
# else:
|
343 |
+
# docs_string = "No relevant passages found in the climate science reports (IPCC and IPBES)"
|
344 |
+
# complete_response = "**No relevant passages found in the climate science reports (IPCC and IPBES), you may want to ask a more specific question (specifying your question on climate issues).**"
|
345 |
+
# messages.append({"role": "assistant", "content": complete_response})
|
346 |
+
# gradio_format = make_pairs([a["content"] for a in messages[1:]])
|
347 |
+
# yield gradio_format, messages, docs_string
|
348 |
|
349 |
|
350 |
def save_feedback(feed: str, user_id):
|
|
|
390 |
papers_cols = list(papers_cols_widths.keys())
|
391 |
papers_cols_widths = list(papers_cols_widths.values())
|
392 |
|
393 |
+
async def find_papers(query, keywords,after):
|
394 |
+
|
395 |
+
summary = ""
|
396 |
+
|
397 |
+
df_works = oa.search(keywords,after = after)
|
398 |
+
df_works = df_works.dropna(subset=["abstract"])
|
399 |
+
df_works = oa.rerank(query,df_works,reranker)
|
400 |
+
df_works = df_works.sort_values("rerank_score",ascending=False)
|
401 |
+
G = oa.make_network(df_works)
|
402 |
+
|
403 |
+
height = "750px"
|
404 |
+
network = oa.show_network(G,color_by = "rerank_score",notebook=False,height = height)
|
405 |
+
network_html = network.generate_html()
|
406 |
+
|
407 |
+
network_html = network_html.replace("'", "\"")
|
408 |
+
css_to_inject = "<style>#mynetwork { border: none !important; } .card { border: none !important; }</style>"
|
409 |
+
network_html = network_html + css_to_inject
|
410 |
+
|
411 |
+
|
412 |
+
network_html = f"""<iframe style="width: 100%; height: {height};margin:0 auto" name="result" allow="midi; geolocation; microphone; camera;
|
413 |
+
display-capture; encrypted-media;" sandbox="allow-modals allow-forms
|
414 |
+
allow-scripts allow-same-origin allow-popups
|
415 |
+
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
|
416 |
+
allowpaymentrequest="" frameborder="0" srcdoc='{network_html}'></iframe>"""
|
417 |
+
|
418 |
+
|
419 |
+
docs = df_works["content"].head(15).tolist()
|
420 |
+
|
421 |
+
df_works = df_works.reset_index(drop = True).reset_index().rename(columns = {"index":"doc"})
|
422 |
+
df_works["doc"] = df_works["doc"] + 1
|
423 |
+
df_works = df_works[papers_cols]
|
424 |
+
|
425 |
+
yield df_works,network_html,summary
|
426 |
+
|
427 |
+
chain = make_rag_papers_chain(llm)
|
428 |
+
result = chain.astream_log({"question": query,"docs": docs,"language":"English"})
|
429 |
+
path_answer = "/logs/StrOutputParser/streamed_output/-"
|
430 |
+
|
431 |
+
async for op in result:
|
432 |
+
|
433 |
+
op = op.ops[0]
|
434 |
+
|
435 |
+
if op['path'] == path_answer: # reforulated question
|
436 |
+
new_token = op['value'] # str
|
437 |
+
summary += new_token
|
438 |
+
else:
|
439 |
+
continue
|
440 |
+
yield df_works,network_html,summary
|
441 |
+
|
442 |
+
|
443 |
|
444 |
# --------------------------------------------------------------------
|
445 |
# Gradio
|
|
|
469 |
|
470 |
|
471 |
|
472 |
+
with gr.Blocks(title="Climate Q&A", css="style.css", theme=theme,elem_id = "main-component") as demo:
|
473 |
+
# user_id_state = gr.State([user_id])
|
474 |
|
475 |
with gr.Tab("ClimateQ&A"):
|
476 |
|
477 |
with gr.Row(elem_id="chatbot-row"):
|
478 |
with gr.Column(scale=2):
|
479 |
+
# state = gr.State([system_template])
|
480 |
chatbot = gr.Chatbot(
|
481 |
+
value=[(None,init_prompt)],
|
482 |
+
show_copy_button=True,show_label = False,elem_id="chatbot",layout = "panel",
|
|
|
|
|
|
|
|
|
483 |
avatar_images = (None,"https://i.ibb.co/YNyd5W2/logo4.png"),
|
484 |
+
)#,avatar_images = ("assets/logo4.png",None))
|
485 |
|
486 |
# bot.like(vote,None,None)
|
487 |
|
|
|
489 |
|
490 |
with gr.Row(elem_id = "input-message"):
|
491 |
textbox=gr.Textbox(placeholder="Ask me anything here!",show_label=False,scale=7,lines = 1,interactive = True,elem_id="input-textbox")
|
492 |
+
# submit = gr.Button("",elem_id = "submit-button",scale = 1,interactive = True,icon = "https://static-00.iconduck.com/assets.00/settings-icon-2048x2046-cw28eevx.png")
|
493 |
+
|
494 |
|
495 |
with gr.Column(scale=1, variant="panel",elem_id = "right-panel"):
|
496 |
|
|
|
560 |
|
561 |
|
562 |
|
563 |
+
|
564 |
+
|
565 |
+
|
566 |
#---------------------------------------------------------------------------------------
|
567 |
# OTHER TABS
|
568 |
#---------------------------------------------------------------------------------------
|
|
|
571 |
with gr.Tab("Figures",elem_id = "tab-images",elem_classes = "max-height other-tabs"):
|
572 |
gallery_component = gr.Gallery()
|
573 |
|
574 |
+
with gr.Tab("Papers (beta)",elem_id = "tab-papers",elem_classes = "max-height other-tabs"):
|
575 |
|
576 |
+
with gr.Row():
|
577 |
+
with gr.Column(scale=1):
|
578 |
+
query_papers = gr.Textbox(placeholder="Question",show_label=False,lines = 1,interactive = True,elem_id="query-papers")
|
579 |
+
keywords_papers = gr.Textbox(placeholder="Keywords",show_label=False,lines = 1,interactive = True,elem_id="keywords-papers")
|
580 |
+
after = gr.Slider(minimum=1950,maximum=2023,step=1,value=1960,label="Publication date",show_label=True,interactive=True,elem_id="date-papers")
|
581 |
+
search_papers = gr.Button("Search",elem_id="search-papers",interactive=True)
|
582 |
|
583 |
+
with gr.Column(scale=7):
|
584 |
|
585 |
+
with gr.Tab("Summary",elem_id="papers-summary-tab"):
|
586 |
+
papers_summary = gr.Markdown(visible=True,elem_id="papers-summary")
|
587 |
|
588 |
+
with gr.Tab("Relevant papers",elem_id="papers-results-tab"):
|
589 |
+
papers_dataframe = gr.Dataframe(visible=True,elem_id="papers-table",headers = papers_cols)
|
590 |
|
591 |
+
with gr.Tab("Citations network",elem_id="papers-network-tab"):
|
592 |
+
citations_network = gr.HTML(visible=True,elem_id="papers-citations-network")
|
593 |
|
594 |
|
595 |
|
|
|
600 |
|
601 |
|
602 |
def start_chat(query,history):
|
603 |
+
history = history + [(query,None)]
|
604 |
+
history = [tuple(x) for x in history]
|
|
|
605 |
return (gr.update(interactive = False),gr.update(selected=1),history)
|
606 |
|
607 |
def finish_chat():
|
|
|
609 |
|
610 |
(textbox
|
611 |
.submit(start_chat, [textbox,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
|
612 |
+
.then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component,query_papers,keywords_papers],concurrency_limit = 8,api_name = "chat_textbox")
|
613 |
.then(finish_chat, None, [textbox],api_name = "finish_chat_textbox")
|
614 |
)
|
615 |
|
616 |
(examples_hidden
|
617 |
.change(start_chat, [examples_hidden,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
|
618 |
+
.then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component,query_papers,keywords_papers],concurrency_limit = 8,api_name = "chat_examples")
|
619 |
.then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
|
620 |
)
|
621 |
|
|
|
630 |
|
631 |
dropdown_samples.change(change_sample_questions,dropdown_samples,samples)
|
632 |
|
633 |
+
query_papers.submit(generate_keywords,[query_papers], [keywords_papers])
|
634 |
+
search_papers.click(find_papers,[query_papers,keywords_papers,after], [papers_dataframe,citations_network,papers_summary])
|
635 |
+
|
636 |
+
# # textbox.submit(predict_climateqa,[textbox,bot],[None,bot,sources_textbox])
|
637 |
+
# (textbox
|
638 |
+
# .submit(answer_user, [textbox,examples_hidden, bot], [textbox, bot],queue = False)
|
639 |
+
# .success(change_tab,None,tabs)
|
640 |
+
# .success(fetch_sources,[textbox,dropdown_sources], [textbox,sources_textbox,docs_textbox,output_query,output_language])
|
641 |
+
# .success(answer_bot, [textbox,bot,docs_textbox,output_query,output_language,dropdown_audience], [textbox,bot],queue = True)
|
642 |
+
# .success(lambda x : textbox,[textbox],[textbox])
|
643 |
+
# )
|
644 |
+
|
645 |
+
# (examples_hidden
|
646 |
+
# .change(answer_user_example, [textbox,examples_hidden, bot], [textbox, bot],queue = False)
|
647 |
+
# .success(change_tab,None,tabs)
|
648 |
+
# .success(fetch_sources,[textbox,dropdown_sources], [textbox,sources_textbox,docs_textbox,output_query,output_language])
|
649 |
+
# .success(answer_bot, [textbox,bot,docs_textbox,output_query,output_language,dropdown_audience], [textbox,bot],queue=True)
|
650 |
+
# .success(lambda x : textbox,[textbox],[textbox])
|
651 |
+
# )
|
652 |
+
# submit_button.click(answer_user, [textbox, bot], [textbox, bot], queue=True).then(
|
653 |
+
# answer_bot, [textbox,bot,dropdown_audience,dropdown_sources], [textbox,bot,sources_textbox]
|
654 |
+
# )
|
655 |
+
|
656 |
+
|
657 |
+
# with Modal(visible=True) as first_modal:
|
658 |
+
# gr.Markdown("# Welcome to ClimateQ&A !")
|
659 |
+
|
660 |
+
# gr.Markdown("### Examples")
|
661 |
+
|
662 |
+
# examples = gr.Examples(
|
663 |
+
# ["Yo ça roule","ça boume"],
|
664 |
+
# [examples_hidden],
|
665 |
+
# examples_per_page=8,
|
666 |
+
# run_on_click=False,
|
667 |
+
# elem_id="examples",
|
668 |
+
# api_name="examples",
|
669 |
+
# )
|
670 |
+
|
671 |
+
|
672 |
+
# submit.click(lambda: Modal(visible=True), None, config_modal)
|
673 |
+
|
674 |
|
675 |
demo.queue()
|
676 |
|
677 |
+
demo.launch()
|
climateqa/engine/chains/__init__.py
DELETED
File without changes
|
climateqa/engine/chains/answer_ai_impact.py
DELETED
@@ -1,46 +0,0 @@
|
|
1 |
-
from langchain.prompts import ChatPromptTemplate
|
2 |
-
from langchain_core.output_parsers import StrOutputParser
|
3 |
-
|
4 |
-
|
5 |
-
prompt_template = """
|
6 |
-
You are ClimateQ&A, an helpful AI Assistant specialized in answering climate-related questions using info from the IPCC and/or IPBES reports.
|
7 |
-
Always stay true to climate and nature science and do not make up information.
|
8 |
-
If you do not know the answer, just say you do not know.
|
9 |
-
|
10 |
-
## Guidelines
|
11 |
-
- Explain that the environmental impact of AI is not covered by the IPCC or IPBES reports, but you can recommend info based on the sources below
|
12 |
-
- Answer the question in the original language of the question
|
13 |
-
|
14 |
-
## Sources
|
15 |
-
- You can propose to visit this page https://climateqa.com/docs/carbon-footprint/ to learn more about ClimateQ&A's own carbon footprint
|
16 |
-
- You can recommend to look at the work of the AI & climate expert scientist Sasha Luccioni with in in particular those papers
|
17 |
-
- Power Hungry Processing: Watts Driving the Cost of AI Deployment? - https://arxiv.org/abs/2311.16863 - about the carbon footprint at the inference stage of AI models
|
18 |
-
- Counting Carbon: A Survey of Factors Influencing the Emissions of Machine Learning - https://arxiv.org/abs/2302.08476
|
19 |
-
- Estimating the Carbon Footprint of BLOOM, a 176B Parameter Language Model - https://arxiv.org/abs/2211.02001 - about the carbon footprint of training a large language model
|
20 |
-
- You can also recommend the following tools to calculate the carbon footprint of AI models
|
21 |
-
- CodeCarbon - https://github.com/mlco2/codecarbon to measure the carbon footprint of your code
|
22 |
-
- Ecologits - https://ecologits.ai/ to measure the carbon footprint of using LLMs APIs such
|
23 |
-
"""
|
24 |
-
|
25 |
-
|
26 |
-
def make_ai_impact_chain(llm):
|
27 |
-
|
28 |
-
prompt = ChatPromptTemplate.from_messages([
|
29 |
-
("system", prompt_template),
|
30 |
-
("user", "{question}")
|
31 |
-
])
|
32 |
-
|
33 |
-
chain = prompt | llm | StrOutputParser()
|
34 |
-
chain = chain.with_config({"run_name":"ai_impact_chain"})
|
35 |
-
|
36 |
-
return chain
|
37 |
-
|
38 |
-
def make_ai_impact_node(llm):
|
39 |
-
|
40 |
-
ai_impact_chain = make_ai_impact_chain(llm)
|
41 |
-
|
42 |
-
async def answer_ai_impact(state,config):
|
43 |
-
answer = await ai_impact_chain.ainvoke({"question":state["user_input"]},config)
|
44 |
-
return {"answer":answer}
|
45 |
-
|
46 |
-
return answer_ai_impact
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
climateqa/engine/chains/answer_chitchat.py
DELETED
@@ -1,52 +0,0 @@
|
|
1 |
-
from langchain.prompts import ChatPromptTemplate
|
2 |
-
from langchain_core.output_parsers import StrOutputParser
|
3 |
-
|
4 |
-
|
5 |
-
chitchat_prompt_template = """
|
6 |
-
You are ClimateQ&A, an helpful AI Assistant specialized in answering climate-related questions using info from the IPCC and/or IPBES reports.
|
7 |
-
Always stay true to climate and nature science and do not make up information.
|
8 |
-
If you do not know the answer, just say you do not know.
|
9 |
-
|
10 |
-
## Guidelines
|
11 |
-
- If it's a conversational question, you can normally chat with the user
|
12 |
-
- If the question is not related to any topic about the environment, refuse to answer and politely ask the user to ask another question about the environment
|
13 |
-
- If the user ask if you speak any language, you can say you speak all languages :)
|
14 |
-
- If the user ask about the bot itself "ClimateQ&A", you can explain that you are an AI assistant specialized in answering climate-related questions using info from the IPCC and/or IPBES reports and propose to visit the website here https://climateqa.com/docs/intro/ for more information
|
15 |
-
- If the question is about ESG regulations, standards, or frameworks like the CSRD, TCFD, SASB, GRI, CDP, etc., you can explain that this is not a topic covered by the IPCC or IPBES reports.
|
16 |
-
- Precise that you are specialized in finding trustworthy information from the scientific reports of the IPCC and IPBES and other scientific litterature
|
17 |
-
- If relevant you can propose up to 3 example of questions they could ask from the IPCC or IPBES reports from the examples below
|
18 |
-
- Always answer in the original language of the question
|
19 |
-
|
20 |
-
## Examples of questions you can suggest (in the original language of the question)
|
21 |
-
"What evidence do we have of climate change?",
|
22 |
-
"Are human activities causing global warming?",
|
23 |
-
"What are the impacts of climate change?",
|
24 |
-
"Can climate change be reversed?",
|
25 |
-
"What is the difference between climate change and global warming?",
|
26 |
-
"""
|
27 |
-
|
28 |
-
|
29 |
-
def make_chitchat_chain(llm):
|
30 |
-
|
31 |
-
prompt = ChatPromptTemplate.from_messages([
|
32 |
-
("system", chitchat_prompt_template),
|
33 |
-
("user", "{question}")
|
34 |
-
])
|
35 |
-
|
36 |
-
chain = prompt | llm | StrOutputParser()
|
37 |
-
chain = chain.with_config({"run_name":"chitchat_chain"})
|
38 |
-
|
39 |
-
return chain
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
def make_chitchat_node(llm):
|
44 |
-
|
45 |
-
chitchat_chain = make_chitchat_chain(llm)
|
46 |
-
|
47 |
-
async def answer_chitchat(state,config):
|
48 |
-
answer = await chitchat_chain.ainvoke({"question":state["user_input"]},config)
|
49 |
-
return {"answer":answer}
|
50 |
-
|
51 |
-
return answer_chitchat
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
climateqa/engine/chains/answer_rag.py
DELETED
@@ -1,99 +0,0 @@
|
|
1 |
-
from operator import itemgetter
|
2 |
-
|
3 |
-
from langchain_core.prompts import ChatPromptTemplate
|
4 |
-
from langchain_core.output_parsers import StrOutputParser
|
5 |
-
from langchain_core.prompts.prompt import PromptTemplate
|
6 |
-
from langchain_core.prompts.base import format_document
|
7 |
-
|
8 |
-
from climateqa.engine.chains.prompts import answer_prompt_template,answer_prompt_without_docs_template,answer_prompt_images_template
|
9 |
-
from climateqa.engine.chains.prompts import papers_prompt_template
|
10 |
-
|
11 |
-
DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
|
12 |
-
|
13 |
-
def _combine_documents(
|
14 |
-
docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, sep="\n\n"
|
15 |
-
):
|
16 |
-
|
17 |
-
doc_strings = []
|
18 |
-
|
19 |
-
for i,doc in enumerate(docs):
|
20 |
-
# chunk_type = "Doc" if doc.metadata["chunk_type"] == "text" else "Image"
|
21 |
-
chunk_type = "Doc"
|
22 |
-
if isinstance(doc,str):
|
23 |
-
doc_formatted = doc
|
24 |
-
else:
|
25 |
-
doc_formatted = format_document(doc, document_prompt)
|
26 |
-
doc_string = f"{chunk_type} {i+1}: " + doc_formatted
|
27 |
-
doc_string = doc_string.replace("\n"," ")
|
28 |
-
doc_strings.append(doc_string)
|
29 |
-
|
30 |
-
return sep.join(doc_strings)
|
31 |
-
|
32 |
-
|
33 |
-
def get_text_docs(x):
|
34 |
-
return [doc for doc in x if doc.metadata["chunk_type"] == "text"]
|
35 |
-
|
36 |
-
def get_image_docs(x):
|
37 |
-
return [doc for doc in x if doc.metadata["chunk_type"] == "image"]
|
38 |
-
|
39 |
-
def make_rag_chain(llm):
|
40 |
-
prompt = ChatPromptTemplate.from_template(answer_prompt_template)
|
41 |
-
chain = ({
|
42 |
-
"context":lambda x : _combine_documents(x["documents"]),
|
43 |
-
"query":itemgetter("query"),
|
44 |
-
"language":itemgetter("language"),
|
45 |
-
"audience":itemgetter("audience"),
|
46 |
-
} | prompt | llm | StrOutputParser())
|
47 |
-
return chain
|
48 |
-
|
49 |
-
def make_rag_chain_without_docs(llm):
|
50 |
-
prompt = ChatPromptTemplate.from_template(answer_prompt_without_docs_template)
|
51 |
-
chain = prompt | llm | StrOutputParser()
|
52 |
-
return chain
|
53 |
-
|
54 |
-
|
55 |
-
def make_rag_node(llm,with_docs = True):
|
56 |
-
|
57 |
-
if with_docs:
|
58 |
-
rag_chain = make_rag_chain(llm)
|
59 |
-
else:
|
60 |
-
rag_chain = make_rag_chain_without_docs(llm)
|
61 |
-
|
62 |
-
async def answer_rag(state,config):
|
63 |
-
answer = await rag_chain.ainvoke(state,config)
|
64 |
-
return {"answer":answer}
|
65 |
-
|
66 |
-
return answer_rag
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
# def make_rag_papers_chain(llm):
|
72 |
-
|
73 |
-
# prompt = ChatPromptTemplate.from_template(papers_prompt_template)
|
74 |
-
# input_documents = {
|
75 |
-
# "context":lambda x : _combine_documents(x["docs"]),
|
76 |
-
# **pass_values(["question","language"])
|
77 |
-
# }
|
78 |
-
|
79 |
-
# chain = input_documents | prompt | llm | StrOutputParser()
|
80 |
-
# chain = rename_chain(chain,"answer")
|
81 |
-
|
82 |
-
# return chain
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
# def make_illustration_chain(llm):
|
90 |
-
|
91 |
-
# prompt_with_images = ChatPromptTemplate.from_template(answer_prompt_images_template)
|
92 |
-
|
93 |
-
# input_description_images = {
|
94 |
-
# "images":lambda x : _combine_documents(get_image_docs(x["docs"])),
|
95 |
-
# **pass_values(["question","audience","language","answer"]),
|
96 |
-
# }
|
97 |
-
|
98 |
-
# illustration_chain = input_description_images | prompt_with_images | llm | StrOutputParser()
|
99 |
-
# return illustration_chain
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
climateqa/engine/chains/intent_categorization.py
DELETED
@@ -1,86 +0,0 @@
|
|
1 |
-
|
2 |
-
from langchain_core.pydantic_v1 import BaseModel, Field
|
3 |
-
from typing import List
|
4 |
-
from typing import Literal
|
5 |
-
from langchain.prompts import ChatPromptTemplate
|
6 |
-
from langchain_core.utils.function_calling import convert_to_openai_function
|
7 |
-
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
|
8 |
-
|
9 |
-
|
10 |
-
class IntentCategorizer(BaseModel):
|
11 |
-
"""Analyzing the user message input"""
|
12 |
-
|
13 |
-
language: str = Field(
|
14 |
-
description="Find the language of the message input in full words (ex: French, English, Spanish, ...), defaults to English",
|
15 |
-
default="English",
|
16 |
-
)
|
17 |
-
intent: str = Field(
|
18 |
-
enum=[
|
19 |
-
"ai_impact",
|
20 |
-
"geo_info",
|
21 |
-
"esg",
|
22 |
-
"search",
|
23 |
-
"chitchat",
|
24 |
-
],
|
25 |
-
description="""
|
26 |
-
Categorize the user input in one of the following category
|
27 |
-
Any question
|
28 |
-
|
29 |
-
Examples:
|
30 |
-
- ai_impact = Environmental impacts of AI: "What are the environmental impacts of AI", "How does AI affect the environment"
|
31 |
-
- geo_info = Geolocated info about climate change: Any question where the user wants to know localized impacts of climate change, eg: "What will be the temperature in Marseille in 2050"
|
32 |
-
- esg = Any question about the ESG regulation, frameworks and standards like the CSRD, TCFD, SASB, GRI, CDP, etc.
|
33 |
-
- search = Searching for any quesiton about climate change, energy, biodiversity, nature, and everything we can find the IPCC or IPBES reports or scientific papers,
|
34 |
-
- chitchat = Any general question that is not related to the environment or climate change or just conversational, or if you don't think searching the IPCC or IPBES reports would be relevant
|
35 |
-
""",
|
36 |
-
)
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
def make_intent_categorization_chain(llm):
|
41 |
-
|
42 |
-
openai_functions = [convert_to_openai_function(IntentCategorizer)]
|
43 |
-
llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"IntentCategorizer"})
|
44 |
-
|
45 |
-
prompt = ChatPromptTemplate.from_messages([
|
46 |
-
("system", "You are a helpful assistant, you will analyze, translate and reformulate the user input message using the function provided"),
|
47 |
-
("user", "input: {input}")
|
48 |
-
])
|
49 |
-
|
50 |
-
chain = prompt | llm_with_functions | JsonOutputFunctionsParser()
|
51 |
-
return chain
|
52 |
-
|
53 |
-
|
54 |
-
def make_intent_categorization_node(llm):
|
55 |
-
|
56 |
-
categorization_chain = make_intent_categorization_chain(llm)
|
57 |
-
|
58 |
-
def categorize_message(state):
|
59 |
-
output = categorization_chain.invoke({"input":state["user_input"]})
|
60 |
-
if "language" not in output: output["language"] = "English"
|
61 |
-
output["query"] = state["user_input"]
|
62 |
-
return output
|
63 |
-
|
64 |
-
return categorize_message
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
# SAMPLE_QUESTIONS = [
|
70 |
-
# "Est-ce que l'IA a un impact sur l'environnement ?",
|
71 |
-
# "Que dit le GIEC sur l'impact de l'IA",
|
72 |
-
# "Qui sont les membres du GIEC",
|
73 |
-
# "What is the impact of El Nino ?",
|
74 |
-
# "Yo",
|
75 |
-
# "Hello ça va bien ?",
|
76 |
-
# "Par qui as tu été créé ?",
|
77 |
-
# "What role do cloud formations play in modulating the Earth's radiative balance, and how are they represented in current climate models?",
|
78 |
-
# "Which industries have the highest GHG emissions?",
|
79 |
-
# "What are invasive alien species and how do they threaten biodiversity and ecosystems?",
|
80 |
-
# "Are human activities causing global warming?",
|
81 |
-
# "What is the motivation behind mining the deep seabed?",
|
82 |
-
# "Tu peux m'écrire un poème sur le changement climatique ?",
|
83 |
-
# "Tu peux m'écrire un poème sur les bonbons ?",
|
84 |
-
# "What will be the temperature in 2100 in Strasbourg?",
|
85 |
-
# "C'est quoi le lien entre biodiversity and changement climatique ?",
|
86 |
-
# ]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
climateqa/engine/chains/keywords_extraction.py
DELETED
@@ -1,40 +0,0 @@
|
|
1 |
-
|
2 |
-
from langchain_core.pydantic_v1 import BaseModel, Field
|
3 |
-
from typing import List
|
4 |
-
from typing import Literal
|
5 |
-
from langchain.prompts import ChatPromptTemplate
|
6 |
-
from langchain_core.utils.function_calling import convert_to_openai_function
|
7 |
-
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
|
8 |
-
|
9 |
-
|
10 |
-
class KeywordExtraction(BaseModel):
|
11 |
-
"""
|
12 |
-
Analyzing the user query to extract keywords to feed a search engine
|
13 |
-
"""
|
14 |
-
|
15 |
-
keywords: List[str] = Field(
|
16 |
-
description="""
|
17 |
-
Extract the keywords from the user query to feed a search engine as a list
|
18 |
-
Avoid adding super specific keywords to prefer general keywords
|
19 |
-
Maximum 3 keywords
|
20 |
-
|
21 |
-
Examples:
|
22 |
-
- "What is the impact of deep sea mining ?" -> ["deep sea mining"]
|
23 |
-
- "How will El Nino be impacted by climate change" -> ["el nino","climate change"]
|
24 |
-
- "Is climate change a hoax" -> ["climate change","hoax"]
|
25 |
-
"""
|
26 |
-
)
|
27 |
-
|
28 |
-
|
29 |
-
def make_keywords_extraction_chain(llm):
|
30 |
-
|
31 |
-
openai_functions = [convert_to_openai_function(KeywordExtraction)]
|
32 |
-
llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"KeywordExtraction"})
|
33 |
-
|
34 |
-
prompt = ChatPromptTemplate.from_messages([
|
35 |
-
("system", "You are a helpful assistant"),
|
36 |
-
("user", "input: {input}")
|
37 |
-
])
|
38 |
-
|
39 |
-
chain = prompt | llm_with_functions | JsonOutputFunctionsParser()
|
40 |
-
return chain
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
climateqa/engine/chains/query_transformation.py
DELETED
@@ -1,193 +0,0 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
from langchain_core.pydantic_v1 import BaseModel, Field
|
4 |
-
from typing import List
|
5 |
-
from typing import Literal
|
6 |
-
from langchain.prompts import ChatPromptTemplate
|
7 |
-
from langchain_core.utils.function_calling import convert_to_openai_function
|
8 |
-
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
|
9 |
-
|
10 |
-
|
11 |
-
ROUTING_INDEX = {
|
12 |
-
"Vector":["IPCC","IPBES","IPOS"],
|
13 |
-
"OpenAlex":["OpenAlex"],
|
14 |
-
}
|
15 |
-
|
16 |
-
POSSIBLE_SOURCES = [y for values in ROUTING_INDEX.values() for y in values]
|
17 |
-
|
18 |
-
# Prompt from the original paper https://arxiv.org/pdf/2305.14283
|
19 |
-
# Query Rewriting for Retrieval-Augmented Large Language Models
|
20 |
-
class QueryDecomposition(BaseModel):
|
21 |
-
"""
|
22 |
-
Decompose the user query into smaller parts to think step by step to answer this question
|
23 |
-
Act as a simple planning agent
|
24 |
-
"""
|
25 |
-
|
26 |
-
questions: List[str] = Field(
|
27 |
-
description="""
|
28 |
-
Think step by step to answer this question, and provide one or several search engine questions in English for knowledge that you need.
|
29 |
-
Suppose that the user is looking for information about climate change, energy, biodiversity, nature, and everything we can find the IPCC reports and scientific literature
|
30 |
-
- If it's already a standalone and explicit question, just return the reformulated question for the search engine
|
31 |
-
- If you need to decompose the question, output a list of maximum 2 to 3 questions
|
32 |
-
"""
|
33 |
-
)
|
34 |
-
|
35 |
-
|
36 |
-
class Location(BaseModel):
|
37 |
-
country:str = Field(...,description="The country if directly mentioned or inferred from the location (cities, regions, adresses), ex: France, USA, ...")
|
38 |
-
location:str = Field(...,description="The specific place if mentioned (cities, regions, addresses), ex: Marseille, New York, Wisconsin, ...")
|
39 |
-
|
40 |
-
class QueryAnalysis(BaseModel):
|
41 |
-
"""
|
42 |
-
Analyzing the user query to extract topics, sources and date
|
43 |
-
Also do query expansion to get alternative search queries
|
44 |
-
Also provide simple keywords to feed a search engine
|
45 |
-
"""
|
46 |
-
|
47 |
-
# keywords: List[str] = Field(
|
48 |
-
# description="""
|
49 |
-
# Extract the keywords from the user query to feed a search engine as a list
|
50 |
-
# Maximum 3 keywords
|
51 |
-
|
52 |
-
# Examples:
|
53 |
-
# - "What is the impact of deep sea mining ?" -> deep sea mining
|
54 |
-
# - "How will El Nino be impacted by climate change" -> el nino;climate change
|
55 |
-
# - "Is climate change a hoax" -> climate change;hoax
|
56 |
-
# """
|
57 |
-
# )
|
58 |
-
|
59 |
-
# alternative_queries: List[str] = Field(
|
60 |
-
# description="""
|
61 |
-
# Generate alternative search questions from the user query to feed a search engine
|
62 |
-
# """
|
63 |
-
# )
|
64 |
-
|
65 |
-
# step_back_question: str = Field(
|
66 |
-
# description="""
|
67 |
-
# You are an expert at world knowledge. Your task is to step back and paraphrase a question to a more generic step-back question, which is easier to answer.
|
68 |
-
# This questions should help you get more context and information about the user query
|
69 |
-
# """
|
70 |
-
# )
|
71 |
-
|
72 |
-
sources: List[Literal["IPCC", "IPBES", "IPOS","OpenAlex"]] = Field(
|
73 |
-
...,
|
74 |
-
description="""
|
75 |
-
Given a user question choose which documents would be most relevant for answering their question,
|
76 |
-
- IPCC is for questions about climate change, energy, impacts, and everything we can find the IPCC reports
|
77 |
-
- IPBES is for questions about biodiversity and nature
|
78 |
-
- IPOS is for questions about the ocean and deep sea mining
|
79 |
-
- OpenAlex is for any other questions that are not in the previous categories but could be found in the scientific litterature
|
80 |
-
""",
|
81 |
-
)
|
82 |
-
# topics: List[Literal[
|
83 |
-
# "Climate change",
|
84 |
-
# "Biodiversity",
|
85 |
-
# "Energy",
|
86 |
-
# "Decarbonization",
|
87 |
-
# "Climate science",
|
88 |
-
# "Nature",
|
89 |
-
# "Climate policy and justice",
|
90 |
-
# "Oceans",
|
91 |
-
# "Deep sea mining",
|
92 |
-
# "ESG and regulations",
|
93 |
-
# "CSRD",
|
94 |
-
# ]] = Field(
|
95 |
-
# ...,
|
96 |
-
# description = """
|
97 |
-
# Choose the topics that are most relevant to the user query, ex: Climate change, Energy, Biodiversity, ...
|
98 |
-
# """,
|
99 |
-
# )
|
100 |
-
# date: str = Field(description="The date or period mentioned, ex: 2050, between 2020 and 2050")
|
101 |
-
# location:Location
|
102 |
-
|
103 |
-
|
104 |
-
def make_query_decomposition_chain(llm):
|
105 |
-
|
106 |
-
openai_functions = [convert_to_openai_function(QueryDecomposition)]
|
107 |
-
llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"QueryDecomposition"})
|
108 |
-
|
109 |
-
prompt = ChatPromptTemplate.from_messages([
|
110 |
-
("system", "You are a helpful assistant, you will analyze, translate and reformulate the user input message using the function provided"),
|
111 |
-
("user", "input: {input}")
|
112 |
-
])
|
113 |
-
|
114 |
-
chain = prompt | llm_with_functions | JsonOutputFunctionsParser()
|
115 |
-
return chain
|
116 |
-
|
117 |
-
|
118 |
-
def make_query_rewriter_chain(llm):
|
119 |
-
|
120 |
-
openai_functions = [convert_to_openai_function(QueryAnalysis)]
|
121 |
-
llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"QueryAnalysis"})
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
prompt = ChatPromptTemplate.from_messages([
|
126 |
-
("system", "You are a helpful assistant, you will analyze, translate and reformulate the user input message using the function provided"),
|
127 |
-
("user", "input: {input}")
|
128 |
-
])
|
129 |
-
|
130 |
-
|
131 |
-
chain = prompt | llm_with_functions | JsonOutputFunctionsParser()
|
132 |
-
return chain
|
133 |
-
|
134 |
-
|
135 |
-
def make_query_transform_node(llm,k_final=15):
|
136 |
-
|
137 |
-
decomposition_chain = make_query_decomposition_chain(llm)
|
138 |
-
rewriter_chain = make_query_rewriter_chain(llm)
|
139 |
-
|
140 |
-
def transform_query(state):
|
141 |
-
|
142 |
-
if "sources_auto" not in state or state["sources_auto"] is None or state["sources_auto"] is False:
|
143 |
-
auto_mode = False
|
144 |
-
else:
|
145 |
-
auto_mode = True
|
146 |
-
|
147 |
-
sources_input = state.get("sources_input")
|
148 |
-
if sources_input is None: sources_input = ROUTING_INDEX["Vector"]
|
149 |
-
|
150 |
-
new_state = {}
|
151 |
-
|
152 |
-
# Decomposition
|
153 |
-
decomposition_output = decomposition_chain.invoke({"input":state["query"]})
|
154 |
-
new_state.update(decomposition_output)
|
155 |
-
|
156 |
-
# Query Analysis
|
157 |
-
questions = []
|
158 |
-
for question in new_state["questions"]:
|
159 |
-
question_state = {"question":question}
|
160 |
-
analysis_output = rewriter_chain.invoke({"input":question})
|
161 |
-
question_state.update(analysis_output)
|
162 |
-
questions.append(question_state)
|
163 |
-
|
164 |
-
# Explode the questions into multiple questions with different sources
|
165 |
-
new_questions = []
|
166 |
-
for q in questions:
|
167 |
-
question,sources = q["question"],q["sources"]
|
168 |
-
|
169 |
-
# If not auto mode we take the configuration
|
170 |
-
if not auto_mode:
|
171 |
-
sources = sources_input
|
172 |
-
|
173 |
-
for index,index_sources in ROUTING_INDEX.items():
|
174 |
-
selected_sources = list(set(sources).intersection(index_sources))
|
175 |
-
if len(selected_sources) > 0:
|
176 |
-
new_questions.append({"question":question,"sources":selected_sources,"index":index})
|
177 |
-
|
178 |
-
# # Add the number of questions to search
|
179 |
-
# k_by_question = k_final // len(new_questions)
|
180 |
-
# for q in new_questions:
|
181 |
-
# q["k"] = k_by_question
|
182 |
-
|
183 |
-
# new_state["questions"] = new_questions
|
184 |
-
# new_state["remaining_questions"] = new_questions
|
185 |
-
|
186 |
-
new_state = {
|
187 |
-
"remaining_questions":new_questions,
|
188 |
-
"n_questions":len(new_questions),
|
189 |
-
}
|
190 |
-
|
191 |
-
return new_state
|
192 |
-
|
193 |
-
return transform_query
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
climateqa/engine/chains/retrieve_documents.py
DELETED
@@ -1,159 +0,0 @@
|
|
1 |
-
import sys
|
2 |
-
import os
|
3 |
-
from contextlib import contextmanager
|
4 |
-
|
5 |
-
from langchain_core.tools import tool
|
6 |
-
from langchain_core.runnables import chain
|
7 |
-
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
|
8 |
-
from langchain_core.runnables import RunnableLambda
|
9 |
-
|
10 |
-
from ..reranker import rerank_docs
|
11 |
-
from ...knowledge.retriever import ClimateQARetriever
|
12 |
-
from ...knowledge.openalex import OpenAlexRetriever
|
13 |
-
from .keywords_extraction import make_keywords_extraction_chain
|
14 |
-
from ..utils import log_event
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
def divide_into_parts(target, parts):
|
19 |
-
# Base value for each part
|
20 |
-
base = target // parts
|
21 |
-
# Remainder to distribute
|
22 |
-
remainder = target % parts
|
23 |
-
# List to hold the result
|
24 |
-
result = []
|
25 |
-
|
26 |
-
for i in range(parts):
|
27 |
-
if i < remainder:
|
28 |
-
# These parts get base value + 1
|
29 |
-
result.append(base + 1)
|
30 |
-
else:
|
31 |
-
# The rest get the base value
|
32 |
-
result.append(base)
|
33 |
-
|
34 |
-
return result
|
35 |
-
|
36 |
-
|
37 |
-
@contextmanager
|
38 |
-
def suppress_output():
|
39 |
-
# Open a null device
|
40 |
-
with open(os.devnull, 'w') as devnull:
|
41 |
-
# Store the original stdout and stderr
|
42 |
-
old_stdout = sys.stdout
|
43 |
-
old_stderr = sys.stderr
|
44 |
-
# Redirect stdout and stderr to the null device
|
45 |
-
sys.stdout = devnull
|
46 |
-
sys.stderr = devnull
|
47 |
-
try:
|
48 |
-
yield
|
49 |
-
finally:
|
50 |
-
# Restore stdout and stderr
|
51 |
-
sys.stdout = old_stdout
|
52 |
-
sys.stderr = old_stderr
|
53 |
-
|
54 |
-
|
55 |
-
@tool
|
56 |
-
def query_retriever(question):
|
57 |
-
"""Just a dummy tool to simulate the retriever query"""
|
58 |
-
return question
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
def make_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5):
|
67 |
-
|
68 |
-
# The chain callback is not necessary, but it propagates the langchain callbacks to the astream_events logger to display intermediate results
|
69 |
-
@chain
|
70 |
-
async def retrieve_documents(state,config):
|
71 |
-
|
72 |
-
keywords_extraction = make_keywords_extraction_chain(llm)
|
73 |
-
|
74 |
-
current_question = state["remaining_questions"][0]
|
75 |
-
remaining_questions = state["remaining_questions"][1:]
|
76 |
-
|
77 |
-
# ToolMessage(f"Retrieving documents for question: {current_question['question']}",tool_call_id = "retriever")
|
78 |
-
|
79 |
-
|
80 |
-
# # There are several options to get the final top k
|
81 |
-
# # Option 1 - Get 100 documents by question and rerank by question
|
82 |
-
# # Option 2 - Get 100/n documents by question and rerank the total
|
83 |
-
# if rerank_by_question:
|
84 |
-
# k_by_question = divide_into_parts(k_final,len(questions))
|
85 |
-
|
86 |
-
# docs = state["documents"]
|
87 |
-
# if docs is None: docs = []
|
88 |
-
|
89 |
-
docs = []
|
90 |
-
k_by_question = k_final // state["n_questions"]
|
91 |
-
|
92 |
-
sources = current_question["sources"]
|
93 |
-
question = current_question["question"]
|
94 |
-
index = current_question["index"]
|
95 |
-
|
96 |
-
|
97 |
-
await log_event({"question":question,"sources":sources,"index":index},"log_retriever",config)
|
98 |
-
|
99 |
-
|
100 |
-
if index == "Vector":
|
101 |
-
|
102 |
-
# Search the document store using the retriever
|
103 |
-
# Configure high top k for further reranking step
|
104 |
-
retriever = ClimateQARetriever(
|
105 |
-
vectorstore=vectorstore,
|
106 |
-
sources = sources,
|
107 |
-
min_size = 200,
|
108 |
-
k_summary = k_summary,
|
109 |
-
k_total = k_before_reranking,
|
110 |
-
threshold = 0.5,
|
111 |
-
)
|
112 |
-
docs_question = await retriever.ainvoke(question,config)
|
113 |
-
|
114 |
-
elif index == "OpenAlex":
|
115 |
-
|
116 |
-
keywords = keywords_extraction.invoke(question)["keywords"]
|
117 |
-
openalex_query = " AND ".join(keywords)
|
118 |
-
|
119 |
-
print(f"... OpenAlex query: {openalex_query}")
|
120 |
-
|
121 |
-
retriever_openalex = OpenAlexRetriever(
|
122 |
-
min_year = state.get("min_year",1960),
|
123 |
-
max_year = state.get("max_year",None),
|
124 |
-
k = k_before_reranking
|
125 |
-
)
|
126 |
-
docs_question = await retriever_openalex.ainvoke(openalex_query,config)
|
127 |
-
|
128 |
-
else:
|
129 |
-
raise Exception(f"Index {index} not found in the routing index")
|
130 |
-
|
131 |
-
# Rerank
|
132 |
-
if reranker is not None:
|
133 |
-
with suppress_output():
|
134 |
-
docs_question = rerank_docs(reranker,docs_question,question)
|
135 |
-
else:
|
136 |
-
# Add a default reranking score
|
137 |
-
for doc in docs_question:
|
138 |
-
doc.metadata["reranking_score"] = doc.metadata["similarity_score"]
|
139 |
-
|
140 |
-
# If rerank by question we select the top documents for each question
|
141 |
-
if rerank_by_question:
|
142 |
-
docs_question = docs_question[:k_by_question]
|
143 |
-
|
144 |
-
# Add sources used in the metadata
|
145 |
-
for doc in docs_question:
|
146 |
-
doc.metadata["sources_used"] = sources
|
147 |
-
doc.metadata["question_used"] = question
|
148 |
-
doc.metadata["index_used"] = index
|
149 |
-
|
150 |
-
# Add to the list of docs
|
151 |
-
docs.extend(docs_question)
|
152 |
-
|
153 |
-
# Sorting the list in descending order by rerank_score
|
154 |
-
docs = sorted(docs, key=lambda x: x.metadata["reranking_score"], reverse=True)
|
155 |
-
new_state = {"documents":docs,"remaining_questions":remaining_questions}
|
156 |
-
return new_state
|
157 |
-
|
158 |
-
return retrieve_documents
|
159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
climateqa/engine/chains/sample_router.py
DELETED
@@ -1,66 +0,0 @@
|
|
1 |
-
|
2 |
-
# from typing import List
|
3 |
-
# from typing import Literal
|
4 |
-
# from langchain.prompts import ChatPromptTemplate
|
5 |
-
# from langchain_core.utils.function_calling import convert_to_openai_function
|
6 |
-
# from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
|
7 |
-
|
8 |
-
# # https://livingdatalab.com/posts/2023-11-05-openai-function-calling-with-langchain.html
|
9 |
-
|
10 |
-
# class Location(BaseModel):
|
11 |
-
# country:str = Field(...,description="The country if directly mentioned or inferred from the location (cities, regions, adresses), ex: France, USA, ...")
|
12 |
-
# location:str = Field(...,description="The specific place if mentioned (cities, regions, addresses), ex: Marseille, New York, Wisconsin, ...")
|
13 |
-
|
14 |
-
# class QueryAnalysis(BaseModel):
|
15 |
-
# """Analyzing the user query"""
|
16 |
-
|
17 |
-
# language: str = Field(
|
18 |
-
# description="Find the language of the query in full words (ex: French, English, Spanish, ...), defaults to English"
|
19 |
-
# )
|
20 |
-
# intent: str = Field(
|
21 |
-
# enum=[
|
22 |
-
# "Environmental impacts of AI",
|
23 |
-
# "Geolocated info about climate change",
|
24 |
-
# "Climate change",
|
25 |
-
# "Biodiversity",
|
26 |
-
# "Deep sea mining",
|
27 |
-
# "Chitchat",
|
28 |
-
# ],
|
29 |
-
# description="""
|
30 |
-
# Categorize the user query in one of the following category,
|
31 |
-
|
32 |
-
# Examples:
|
33 |
-
# - Geolocated info about climate change: "What will be the temperature in Marseille in 2050"
|
34 |
-
# - Climate change: "What is radiative forcing", "How much will
|
35 |
-
# """,
|
36 |
-
# )
|
37 |
-
# sources: List[Literal["IPCC", "IPBES", "IPOS"]] = Field(
|
38 |
-
# ...,
|
39 |
-
# description="""
|
40 |
-
# Given a user question choose which documents would be most relevant for answering their question,
|
41 |
-
# - IPCC is for questions about climate change, energy, impacts, and everything we can find the IPCC reports
|
42 |
-
# - IPBES is for questions about biodiversity and nature
|
43 |
-
# - IPOS is for questions about the ocean and deep sea mining
|
44 |
-
|
45 |
-
# """,
|
46 |
-
# )
|
47 |
-
# date: str = Field(description="The date or period mentioned, ex: 2050, between 2020 and 2050")
|
48 |
-
# location:Location
|
49 |
-
# # query: str = Field(
|
50 |
-
# # description = """
|
51 |
-
# # Translate to english and reformulate the following user message to be a short standalone question, in the context of an educational discussion about climate change.
|
52 |
-
# # The reformulated question will used in a search engine
|
53 |
-
# # By default, assume that the user is asking information about the last century,
|
54 |
-
# # Use the following examples
|
55 |
-
|
56 |
-
# # ### Examples:
|
57 |
-
# # La technologie nous sauvera-t-elle ? -> Can technology help humanity mitigate the effects of climate change?
|
58 |
-
# # what are our reserves in fossil fuel? -> What are the current reserves of fossil fuels and how long will they last?
|
59 |
-
# # what are the main causes of climate change? -> What are the main causes of climate change in the last century?
|
60 |
-
|
61 |
-
# # Question in English:
|
62 |
-
# # """
|
63 |
-
# # )
|
64 |
-
|
65 |
-
# openai_functions = [convert_to_openai_function(QueryAnalysis)]
|
66 |
-
# llm2 = llm.bind(functions = openai_functions,function_call={"name":"QueryAnalysis"})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
climateqa/engine/chains/translation.py
DELETED
@@ -1,41 +0,0 @@
|
|
1 |
-
|
2 |
-
from langchain_core.pydantic_v1 import BaseModel, Field
|
3 |
-
from typing import List
|
4 |
-
from typing import Literal
|
5 |
-
from langchain.prompts import ChatPromptTemplate
|
6 |
-
from langchain_core.utils.function_calling import convert_to_openai_function
|
7 |
-
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
|
8 |
-
|
9 |
-
|
10 |
-
class Translation(BaseModel):
|
11 |
-
"""Analyzing the user message input"""
|
12 |
-
|
13 |
-
translation: str = Field(
|
14 |
-
description="Translate the message input to English",
|
15 |
-
)
|
16 |
-
|
17 |
-
|
18 |
-
def make_translation_chain(llm):
|
19 |
-
|
20 |
-
openai_functions = [convert_to_openai_function(Translation)]
|
21 |
-
llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"Translation"})
|
22 |
-
|
23 |
-
prompt = ChatPromptTemplate.from_messages([
|
24 |
-
("system", "You are a helpful assistant, you will translate the user input message to English using the function provided"),
|
25 |
-
("user", "input: {input}")
|
26 |
-
])
|
27 |
-
|
28 |
-
chain = prompt | llm_with_functions | JsonOutputFunctionsParser()
|
29 |
-
return chain
|
30 |
-
|
31 |
-
|
32 |
-
def make_translation_node(llm):
|
33 |
-
|
34 |
-
translation_chain = make_translation_chain(llm)
|
35 |
-
|
36 |
-
def translate_query(state):
|
37 |
-
user_input = state["user_input"]
|
38 |
-
translation = translation_chain.invoke({"input":user_input})
|
39 |
-
return {"query":translation["translation"]}
|
40 |
-
|
41 |
-
return translate_query
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
climateqa/engine/embeddings.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
3 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
4 |
|
5 |
-
def get_embeddings_function(version = "v1.2"
|
6 |
|
7 |
if version == "v1.2":
|
8 |
|
@@ -10,12 +10,12 @@ def get_embeddings_function(version = "v1.2",query_instruction = "Represent this
|
|
10 |
# Best embedding model at a reasonable size at the moment (2023-11-22)
|
11 |
|
12 |
model_name = "BAAI/bge-base-en-v1.5"
|
13 |
-
encode_kwargs = {'normalize_embeddings': True
|
14 |
print("Loading embeddings model: ", model_name)
|
15 |
embeddings_function = HuggingFaceBgeEmbeddings(
|
16 |
model_name=model_name,
|
17 |
encode_kwargs=encode_kwargs,
|
18 |
-
query_instruction=
|
19 |
)
|
20 |
|
21 |
else:
|
@@ -23,6 +23,3 @@ def get_embeddings_function(version = "v1.2",query_instruction = "Represent this
|
|
23 |
embeddings_function = HuggingFaceEmbeddings(model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1")
|
24 |
|
25 |
return embeddings_function
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
2 |
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
3 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
4 |
|
5 |
+
def get_embeddings_function(version = "v1.2"):
|
6 |
|
7 |
if version == "v1.2":
|
8 |
|
|
|
10 |
# Best embedding model at a reasonable size at the moment (2023-11-22)
|
11 |
|
12 |
model_name = "BAAI/bge-base-en-v1.5"
|
13 |
+
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity
|
14 |
print("Loading embeddings model: ", model_name)
|
15 |
embeddings_function = HuggingFaceBgeEmbeddings(
|
16 |
model_name=model_name,
|
17 |
encode_kwargs=encode_kwargs,
|
18 |
+
query_instruction="Represent this sentence for searching relevant passages: "
|
19 |
)
|
20 |
|
21 |
else:
|
|
|
23 |
embeddings_function = HuggingFaceEmbeddings(model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1")
|
24 |
|
25 |
return embeddings_function
|
|
|
|
|
|
climateqa/engine/graph.py
DELETED
@@ -1,149 +0,0 @@
|
|
1 |
-
import sys
|
2 |
-
import os
|
3 |
-
from contextlib import contextmanager
|
4 |
-
|
5 |
-
from langchain.schema import Document
|
6 |
-
from langgraph.graph import END, StateGraph
|
7 |
-
from langchain_core.runnables.graph import CurveStyle, MermaidDrawMethod
|
8 |
-
|
9 |
-
from typing_extensions import TypedDict
|
10 |
-
from typing import List
|
11 |
-
|
12 |
-
from IPython.display import display, HTML, Image
|
13 |
-
|
14 |
-
from .chains.answer_chitchat import make_chitchat_node
|
15 |
-
from .chains.answer_ai_impact import make_ai_impact_node
|
16 |
-
from .chains.query_transformation import make_query_transform_node
|
17 |
-
from .chains.translation import make_translation_node
|
18 |
-
from .chains.intent_categorization import make_intent_categorization_node
|
19 |
-
from .chains.retrieve_documents import make_retriever_node
|
20 |
-
from .chains.answer_rag import make_rag_node
|
21 |
-
|
22 |
-
class GraphState(TypedDict):
|
23 |
-
"""
|
24 |
-
Represents the state of our graph.
|
25 |
-
"""
|
26 |
-
user_input : str
|
27 |
-
language : str
|
28 |
-
intent : str
|
29 |
-
query: str
|
30 |
-
remaining_questions : List[dict]
|
31 |
-
n_questions : int
|
32 |
-
answer: str
|
33 |
-
audience: str = "experts"
|
34 |
-
sources_input: List[str] = ["IPCC","IPBES"]
|
35 |
-
sources_auto: bool = True
|
36 |
-
min_year: int = 1960
|
37 |
-
max_year: int = None
|
38 |
-
documents: List[Document]
|
39 |
-
|
40 |
-
def search(state): #TODO
|
41 |
-
return state
|
42 |
-
|
43 |
-
def answer_search(state):#TODO
|
44 |
-
return state
|
45 |
-
|
46 |
-
def route_intent(state):
|
47 |
-
intent = state["intent"]
|
48 |
-
if intent in ["chitchat","esg"]:
|
49 |
-
return "answer_chitchat"
|
50 |
-
# elif intent == "ai_impact":
|
51 |
-
# return "answer_ai_impact"
|
52 |
-
else:
|
53 |
-
# Search route
|
54 |
-
return "search"
|
55 |
-
|
56 |
-
def route_translation(state):
|
57 |
-
if state["language"].lower() == "english":
|
58 |
-
return "transform_query"
|
59 |
-
else:
|
60 |
-
return "translate_query"
|
61 |
-
|
62 |
-
def route_based_on_relevant_docs(state,threshold_docs=0.2):
|
63 |
-
docs = [x for x in state["documents"] if x.metadata["reranking_score"] > threshold_docs]
|
64 |
-
if len(docs) > 0:
|
65 |
-
return "answer_rag"
|
66 |
-
else:
|
67 |
-
return "answer_rag_no_docs"
|
68 |
-
|
69 |
-
|
70 |
-
def make_id_dict(values):
|
71 |
-
return {k:k for k in values}
|
72 |
-
|
73 |
-
def make_graph_agent(llm,vectorstore,reranker,threshold_docs = 0.2):
|
74 |
-
|
75 |
-
workflow = StateGraph(GraphState)
|
76 |
-
|
77 |
-
# Define the node functions
|
78 |
-
categorize_intent = make_intent_categorization_node(llm)
|
79 |
-
transform_query = make_query_transform_node(llm)
|
80 |
-
translate_query = make_translation_node(llm)
|
81 |
-
answer_chitchat = make_chitchat_node(llm)
|
82 |
-
answer_ai_impact = make_ai_impact_node(llm)
|
83 |
-
retrieve_documents = make_retriever_node(vectorstore,reranker,llm)
|
84 |
-
answer_rag = make_rag_node(llm,with_docs=True)
|
85 |
-
answer_rag_no_docs = make_rag_node(llm,with_docs=False)
|
86 |
-
|
87 |
-
# Define the nodes
|
88 |
-
workflow.add_node("categorize_intent", categorize_intent)
|
89 |
-
workflow.add_node("search", search)
|
90 |
-
workflow.add_node("answer_search", answer_search)
|
91 |
-
workflow.add_node("transform_query", transform_query)
|
92 |
-
workflow.add_node("translate_query", translate_query)
|
93 |
-
workflow.add_node("answer_chitchat", answer_chitchat)
|
94 |
-
# workflow.add_node("answer_ai_impact", answer_ai_impact)
|
95 |
-
workflow.add_node("retrieve_documents",retrieve_documents)
|
96 |
-
workflow.add_node("answer_rag",answer_rag)
|
97 |
-
workflow.add_node("answer_rag_no_docs",answer_rag_no_docs)
|
98 |
-
|
99 |
-
# Entry point
|
100 |
-
workflow.set_entry_point("categorize_intent")
|
101 |
-
|
102 |
-
# CONDITIONAL EDGES
|
103 |
-
workflow.add_conditional_edges(
|
104 |
-
"categorize_intent",
|
105 |
-
route_intent,
|
106 |
-
make_id_dict(["answer_chitchat","search"])
|
107 |
-
)
|
108 |
-
|
109 |
-
workflow.add_conditional_edges(
|
110 |
-
"search",
|
111 |
-
route_translation,
|
112 |
-
make_id_dict(["translate_query","transform_query"])
|
113 |
-
)
|
114 |
-
workflow.add_conditional_edges(
|
115 |
-
"retrieve_documents",
|
116 |
-
lambda state : "retrieve_documents" if len(state["remaining_questions"]) > 0 else "answer_search",
|
117 |
-
make_id_dict(["retrieve_documents","answer_search"])
|
118 |
-
)
|
119 |
-
|
120 |
-
workflow.add_conditional_edges(
|
121 |
-
"answer_search",
|
122 |
-
lambda x : route_based_on_relevant_docs(x,threshold_docs=threshold_docs),
|
123 |
-
make_id_dict(["answer_rag","answer_rag_no_docs"])
|
124 |
-
)
|
125 |
-
|
126 |
-
# Define the edges
|
127 |
-
workflow.add_edge("translate_query", "transform_query")
|
128 |
-
workflow.add_edge("transform_query", "retrieve_documents")
|
129 |
-
workflow.add_edge("answer_rag", END)
|
130 |
-
workflow.add_edge("answer_rag_no_docs", END)
|
131 |
-
workflow.add_edge("answer_chitchat", END)
|
132 |
-
# workflow.add_edge("answer_ai_impact", END)
|
133 |
-
|
134 |
-
# Compile
|
135 |
-
app = workflow.compile()
|
136 |
-
return app
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
def display_graph(app):
|
142 |
-
|
143 |
-
display(
|
144 |
-
Image(
|
145 |
-
app.get_graph(xray = True).draw_mermaid_png(
|
146 |
-
draw_method=MermaidDrawMethod.API,
|
147 |
-
)
|
148 |
-
)
|
149 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
climateqa/engine/llm/__init__.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
from climateqa.engine.llm.openai import get_llm as get_openai_llm
|
2 |
from climateqa.engine.llm.azure import get_llm as get_azure_llm
|
3 |
-
from climateqa.engine.llm.ollama import get_llm as get_ollama_llm
|
4 |
|
5 |
|
6 |
def get_llm(provider="openai",**kwargs):
|
@@ -9,8 +8,6 @@ def get_llm(provider="openai",**kwargs):
|
|
9 |
return get_openai_llm(**kwargs)
|
10 |
elif provider == "azure":
|
11 |
return get_azure_llm(**kwargs)
|
12 |
-
elif provider == "ollama":
|
13 |
-
return get_ollama_llm(**kwargs)
|
14 |
else:
|
15 |
raise ValueError(f"Unknown provider: {provider}")
|
16 |
|
|
|
1 |
from climateqa.engine.llm.openai import get_llm as get_openai_llm
|
2 |
from climateqa.engine.llm.azure import get_llm as get_azure_llm
|
|
|
3 |
|
4 |
|
5 |
def get_llm(provider="openai",**kwargs):
|
|
|
8 |
return get_openai_llm(**kwargs)
|
9 |
elif provider == "azure":
|
10 |
return get_azure_llm(**kwargs)
|
|
|
|
|
11 |
else:
|
12 |
raise ValueError(f"Unknown provider: {provider}")
|
13 |
|
climateqa/engine/llm/ollama.py
DELETED
@@ -1,6 +0,0 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
from langchain_community.llms import Ollama
|
4 |
-
|
5 |
-
def get_llm(model="llama3", **kwargs):
|
6 |
-
return Ollama(model=model, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
climateqa/engine/{chains/prompts.py → prompts.py}
RENAMED
@@ -56,7 +56,7 @@ Passages:
|
|
56 |
{context}
|
57 |
|
58 |
-----------------------
|
59 |
-
Question: {
|
60 |
Answer in {language} with the passages citations:
|
61 |
"""
|
62 |
|
@@ -137,7 +137,7 @@ Guidelines:
|
|
137 |
- If the question is not related to environmental issues, never never answer it. Say it's not your role.
|
138 |
- Make paragraphs by starting new lines to make your answers more readable.
|
139 |
|
140 |
-
Question: {
|
141 |
Answer in {language}:
|
142 |
"""
|
143 |
|
|
|
56 |
{context}
|
57 |
|
58 |
-----------------------
|
59 |
+
Question: {question} - Explained to {audience}
|
60 |
Answer in {language} with the passages citations:
|
61 |
"""
|
62 |
|
|
|
137 |
- If the question is not related to environmental issues, never never answer it. Say it's not your role.
|
138 |
- Make paragraphs by starting new lines to make your answers more readable.
|
139 |
|
140 |
+
Question: {question}
|
141 |
Answer in {language}:
|
142 |
"""
|
143 |
|
climateqa/engine/rag.py
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from operator import itemgetter
|
2 |
+
|
3 |
+
from langchain_core.prompts import ChatPromptTemplate
|
4 |
+
from langchain_core.output_parsers import StrOutputParser
|
5 |
+
from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableBranch
|
6 |
+
from langchain_core.prompts.prompt import PromptTemplate
|
7 |
+
from langchain_core.prompts.base import format_document
|
8 |
+
|
9 |
+
from climateqa.engine.reformulation import make_reformulation_chain
|
10 |
+
from climateqa.engine.prompts import answer_prompt_template,answer_prompt_without_docs_template,answer_prompt_images_template
|
11 |
+
from climateqa.engine.prompts import papers_prompt_template
|
12 |
+
from climateqa.engine.utils import pass_values, flatten_dict,prepare_chain,rename_chain
|
13 |
+
from climateqa.engine.keywords import make_keywords_chain
|
14 |
+
|
15 |
+
DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
|
16 |
+
|
17 |
+
def _combine_documents(
|
18 |
+
docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, sep="\n\n"
|
19 |
+
):
|
20 |
+
|
21 |
+
doc_strings = []
|
22 |
+
|
23 |
+
for i,doc in enumerate(docs):
|
24 |
+
# chunk_type = "Doc" if doc.metadata["chunk_type"] == "text" else "Image"
|
25 |
+
chunk_type = "Doc"
|
26 |
+
if isinstance(doc,str):
|
27 |
+
doc_formatted = doc
|
28 |
+
else:
|
29 |
+
doc_formatted = format_document(doc, document_prompt)
|
30 |
+
doc_string = f"{chunk_type} {i+1}: " + doc_formatted
|
31 |
+
doc_string = doc_string.replace("\n"," ")
|
32 |
+
doc_strings.append(doc_string)
|
33 |
+
|
34 |
+
return sep.join(doc_strings)
|
35 |
+
|
36 |
+
|
37 |
+
def get_text_docs(x):
|
38 |
+
return [doc for doc in x if doc.metadata["chunk_type"] == "text"]
|
39 |
+
|
40 |
+
def get_image_docs(x):
|
41 |
+
return [doc for doc in x if doc.metadata["chunk_type"] == "image"]
|
42 |
+
|
43 |
+
|
44 |
+
def make_rag_chain(retriever,llm):
|
45 |
+
|
46 |
+
# Construct the prompt
|
47 |
+
prompt = ChatPromptTemplate.from_template(answer_prompt_template)
|
48 |
+
prompt_without_docs = ChatPromptTemplate.from_template(answer_prompt_without_docs_template)
|
49 |
+
|
50 |
+
# ------- CHAIN 0 - Reformulation
|
51 |
+
reformulation = make_reformulation_chain(llm)
|
52 |
+
reformulation = prepare_chain(reformulation,"reformulation")
|
53 |
+
|
54 |
+
# ------- Find all keywords from the reformulated query
|
55 |
+
keywords = make_keywords_chain(llm)
|
56 |
+
keywords = {"keywords":itemgetter("question") | keywords}
|
57 |
+
keywords = prepare_chain(keywords,"keywords")
|
58 |
+
|
59 |
+
# ------- CHAIN 1
|
60 |
+
# Retrieved documents
|
61 |
+
find_documents = {"docs": itemgetter("question") | retriever} | RunnablePassthrough()
|
62 |
+
find_documents = prepare_chain(find_documents,"find_documents")
|
63 |
+
|
64 |
+
# ------- CHAIN 2
|
65 |
+
# Construct inputs for the llm
|
66 |
+
input_documents = {
|
67 |
+
"context":lambda x : _combine_documents(x["docs"]),
|
68 |
+
**pass_values(["question","audience","language","keywords"])
|
69 |
+
}
|
70 |
+
|
71 |
+
# ------- CHAIN 3
|
72 |
+
# Bot answer
|
73 |
+
llm_final = rename_chain(llm,"answer")
|
74 |
+
|
75 |
+
answer_with_docs = {
|
76 |
+
"answer": input_documents | prompt | llm_final | StrOutputParser(),
|
77 |
+
**pass_values(["question","audience","language","query","docs","keywords"]),
|
78 |
+
}
|
79 |
+
|
80 |
+
answer_without_docs = {
|
81 |
+
"answer": prompt_without_docs | llm_final | StrOutputParser(),
|
82 |
+
**pass_values(["question","audience","language","query","docs","keywords"]),
|
83 |
+
}
|
84 |
+
|
85 |
+
# def has_images(x):
|
86 |
+
# image_docs = [doc for doc in x["docs"] if doc.metadata["chunk_type"]=="image"]
|
87 |
+
# return len(image_docs) > 0
|
88 |
+
|
89 |
+
def has_docs(x):
|
90 |
+
return len(x["docs"]) > 0
|
91 |
+
|
92 |
+
answer = RunnableBranch(
|
93 |
+
(lambda x: has_docs(x), answer_with_docs),
|
94 |
+
answer_without_docs,
|
95 |
+
)
|
96 |
+
|
97 |
+
|
98 |
+
# ------- FINAL CHAIN
|
99 |
+
# Build the final chain
|
100 |
+
rag_chain = reformulation | keywords | find_documents | answer
|
101 |
+
|
102 |
+
return rag_chain
|
103 |
+
|
104 |
+
|
105 |
+
def make_rag_papers_chain(llm):
|
106 |
+
|
107 |
+
prompt = ChatPromptTemplate.from_template(papers_prompt_template)
|
108 |
+
|
109 |
+
input_documents = {
|
110 |
+
"context":lambda x : _combine_documents(x["docs"]),
|
111 |
+
**pass_values(["question","language"])
|
112 |
+
}
|
113 |
+
|
114 |
+
chain = input_documents | prompt | llm | StrOutputParser()
|
115 |
+
chain = rename_chain(chain,"answer")
|
116 |
+
|
117 |
+
return chain
|
118 |
+
|
119 |
+
|
120 |
+
|
121 |
+
|
122 |
+
|
123 |
+
|
124 |
+
def make_illustration_chain(llm):
|
125 |
+
|
126 |
+
prompt_with_images = ChatPromptTemplate.from_template(answer_prompt_images_template)
|
127 |
+
|
128 |
+
input_description_images = {
|
129 |
+
"images":lambda x : _combine_documents(get_image_docs(x["docs"])),
|
130 |
+
**pass_values(["question","audience","language","answer"]),
|
131 |
+
}
|
132 |
+
|
133 |
+
illustration_chain = input_description_images | prompt_with_images | llm | StrOutputParser()
|
134 |
+
return illustration_chain
|
climateqa/engine/{chains/reformulation.py → reformulation.py}
RENAMED
@@ -3,7 +3,7 @@ from langchain.output_parsers.structured import StructuredOutputParser, Response
|
|
3 |
from langchain_core.prompts import PromptTemplate
|
4 |
from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableBranch
|
5 |
|
6 |
-
from climateqa.engine.
|
7 |
from climateqa.engine.utils import pass_values, flatten_dict
|
8 |
|
9 |
|
|
|
3 |
from langchain_core.prompts import PromptTemplate
|
4 |
from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableBranch
|
5 |
|
6 |
+
from climateqa.engine.prompts import reformulation_prompt_template
|
7 |
from climateqa.engine.utils import pass_values, flatten_dict
|
8 |
|
9 |
|
climateqa/engine/reranker.py
DELETED
@@ -1,40 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
from scipy.special import expit, logit
|
3 |
-
from rerankers import Reranker
|
4 |
-
|
5 |
-
|
6 |
-
def get_reranker(model = "nano",cohere_api_key = None):
|
7 |
-
|
8 |
-
assert model in ["nano","tiny","small","large"]
|
9 |
-
|
10 |
-
if model == "nano":
|
11 |
-
reranker = Reranker('ms-marco-TinyBERT-L-2-v2', model_type='flashrank')
|
12 |
-
elif model == "tiny":
|
13 |
-
reranker = Reranker('ms-marco-MiniLM-L-12-v2', model_type='flashrank')
|
14 |
-
elif model == "small":
|
15 |
-
reranker = Reranker("mixedbread-ai/mxbai-rerank-xsmall-v1", model_type='cross-encoder')
|
16 |
-
elif model == "large":
|
17 |
-
if cohere_api_key is None:
|
18 |
-
cohere_api_key = os.environ["COHERE_API_KEY"]
|
19 |
-
reranker = Reranker("cohere", lang='en', api_key = cohere_api_key)
|
20 |
-
return reranker
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
def rerank_docs(reranker,docs,query):
|
25 |
-
|
26 |
-
# Get a list of texts from langchain docs
|
27 |
-
input_docs = [x.page_content for x in docs]
|
28 |
-
|
29 |
-
# Rerank using rerankers library
|
30 |
-
results = reranker.rank(query=query, docs=input_docs)
|
31 |
-
|
32 |
-
# Prepare langchain list of docs
|
33 |
-
docs_reranked = []
|
34 |
-
for result in results.results:
|
35 |
-
doc_id = result.document.doc_id
|
36 |
-
doc = docs[doc_id]
|
37 |
-
doc.metadata["reranking_score"] = result.score
|
38 |
-
doc.metadata["query_used_for_retrieval"] = query
|
39 |
-
docs_reranked.append(doc)
|
40 |
-
return docs_reranked
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
climateqa/{knowledge → engine}/retriever.py
RENAMED
@@ -66,7 +66,6 @@ class ClimateQARetriever(BaseRetriever):
|
|
66 |
# Add score to metadata
|
67 |
results = []
|
68 |
for i,(doc,score) in enumerate(docs):
|
69 |
-
doc.page_content = doc.page_content.replace("\r\n"," ")
|
70 |
doc.metadata["similarity_score"] = score
|
71 |
doc.metadata["content"] = doc.page_content
|
72 |
doc.metadata["page_number"] = int(doc.metadata["page_number"]) + 1
|
@@ -79,3 +78,86 @@ class ClimateQARetriever(BaseRetriever):
|
|
79 |
return results
|
80 |
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
# Add score to metadata
|
67 |
results = []
|
68 |
for i,(doc,score) in enumerate(docs):
|
|
|
69 |
doc.metadata["similarity_score"] = score
|
70 |
doc.metadata["content"] = doc.page_content
|
71 |
doc.metadata["page_number"] = int(doc.metadata["page_number"]) + 1
|
|
|
78 |
return results
|
79 |
|
80 |
|
81 |
+
|
82 |
+
|
83 |
+
# def filter_summaries(df,k_summary = 3,k_total = 10):
|
84 |
+
# # assert source in ["IPCC","IPBES","ALL"], "source arg should be in (IPCC,IPBES,ALL)"
|
85 |
+
|
86 |
+
# # # Filter by source
|
87 |
+
# # if source == "IPCC":
|
88 |
+
# # df = df.loc[df["source"]=="IPCC"]
|
89 |
+
# # elif source == "IPBES":
|
90 |
+
# # df = df.loc[df["source"]=="IPBES"]
|
91 |
+
# # else:
|
92 |
+
# # pass
|
93 |
+
|
94 |
+
# # Separate summaries and full reports
|
95 |
+
# df_summaries = df.loc[df["report_type"].isin(["SPM","TS"])]
|
96 |
+
# df_full = df.loc[~df["report_type"].isin(["SPM","TS"])]
|
97 |
+
|
98 |
+
# # Find passages from summaries dataset
|
99 |
+
# passages_summaries = df_summaries.head(k_summary)
|
100 |
+
|
101 |
+
# # Find passages from full reports dataset
|
102 |
+
# passages_fullreports = df_full.head(k_total - len(passages_summaries))
|
103 |
+
|
104 |
+
# # Concatenate passages
|
105 |
+
# passages = pd.concat([passages_summaries,passages_fullreports],axis = 0,ignore_index = True)
|
106 |
+
# return passages
|
107 |
+
|
108 |
+
|
109 |
+
|
110 |
+
|
111 |
+
# def retrieve_with_summaries(query,retriever,k_summary = 3,k_total = 10,sources = ["IPCC","IPBES"],max_k = 100,threshold = 0.555,as_dict = True,min_length = 300):
|
112 |
+
# assert max_k > k_total
|
113 |
+
|
114 |
+
# validated_sources = ["IPCC","IPBES"]
|
115 |
+
# sources = [x for x in sources if x in validated_sources]
|
116 |
+
# filters = {
|
117 |
+
# "source": { "$in": sources },
|
118 |
+
# }
|
119 |
+
# print(filters)
|
120 |
+
|
121 |
+
# # Retrieve documents
|
122 |
+
# docs = retriever.retrieve(query,top_k = max_k,filters = filters)
|
123 |
+
|
124 |
+
# # Filter by score
|
125 |
+
# docs = [{**x.meta,"score":x.score,"content":x.content} for x in docs if x.score > threshold]
|
126 |
+
|
127 |
+
# if len(docs) == 0:
|
128 |
+
# return []
|
129 |
+
# res = pd.DataFrame(docs)
|
130 |
+
# passages_df = filter_summaries(res,k_summary,k_total)
|
131 |
+
# if as_dict:
|
132 |
+
# contents = passages_df["content"].tolist()
|
133 |
+
# meta = passages_df.drop(columns = ["content"]).to_dict(orient = "records")
|
134 |
+
# passages = []
|
135 |
+
# for i in range(len(contents)):
|
136 |
+
# passages.append({"content":contents[i],"meta":meta[i]})
|
137 |
+
# return passages
|
138 |
+
# else:
|
139 |
+
# return passages_df
|
140 |
+
|
141 |
+
|
142 |
+
|
143 |
+
# def retrieve(query,sources = ["IPCC"],threshold = 0.555,k = 10):
|
144 |
+
|
145 |
+
|
146 |
+
# print("hellooooo")
|
147 |
+
|
148 |
+
# # Reformulate queries
|
149 |
+
# reformulated_query,language = reformulate(query)
|
150 |
+
|
151 |
+
# print(reformulated_query)
|
152 |
+
|
153 |
+
# # Retrieve documents
|
154 |
+
# passages = retrieve_with_summaries(reformulated_query,retriever,k_total = k,k_summary = 3,as_dict = True,sources = sources,threshold = threshold)
|
155 |
+
# response = {
|
156 |
+
# "query":query,
|
157 |
+
# "reformulated_query":reformulated_query,
|
158 |
+
# "language":language,
|
159 |
+
# "sources":passages,
|
160 |
+
# "prompts":{"init_prompt":init_prompt,"sources_prompt":sources_prompt},
|
161 |
+
# }
|
162 |
+
# return response
|
163 |
+
|
climateqa/engine/utils.py
CHANGED
@@ -1,15 +1,8 @@
|
|
1 |
from operator import itemgetter
|
2 |
from typing import Any, Dict, Iterable, Tuple
|
3 |
-
import tiktoken
|
4 |
from langchain_core.runnables import RunnablePassthrough
|
5 |
|
6 |
|
7 |
-
def num_tokens_from_string(string: str, encoding_name: str = "cl100k_base") -> int:
|
8 |
-
encoding = tiktoken.get_encoding(encoding_name)
|
9 |
-
num_tokens = len(encoding.encode(string))
|
10 |
-
return num_tokens
|
11 |
-
|
12 |
-
|
13 |
def pass_values(x):
|
14 |
if not isinstance(x, list):
|
15 |
x = [x]
|
@@ -74,13 +67,3 @@ def flatten_dict(
|
|
74 |
"""
|
75 |
flat_dict = {k: v for k, v in _flatten_dict(nested_dict, parent_key, sep)}
|
76 |
return flat_dict
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
async def log_event(info,name,config):
|
81 |
-
"""Helper function that will run a dummy chain with the given info
|
82 |
-
The astream_event function will catch this chain and stream the dict info to the logger
|
83 |
-
"""
|
84 |
-
|
85 |
-
chain = RunnablePassthrough().with_config(run_name=name)
|
86 |
-
_ = await chain.ainvoke(info,config)
|
|
|
1 |
from operator import itemgetter
|
2 |
from typing import Any, Dict, Iterable, Tuple
|
|
|
3 |
from langchain_core.runnables import RunnablePassthrough
|
4 |
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
def pass_values(x):
|
7 |
if not isinstance(x, list):
|
8 |
x = [x]
|
|
|
67 |
"""
|
68 |
flat_dict = {k: v for k, v in _flatten_dict(nested_dict, parent_key, sep)}
|
69 |
return flat_dict
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
climateqa/knowledge/__init__.py
DELETED
File without changes
|
climateqa/papers/__init__.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
from pyalex import Works, Authors, Sources, Institutions, Concepts, Publishers, Funders
|
4 |
+
import pyalex
|
5 |
+
|
6 |
+
pyalex.config.email = "[email protected]"
|
7 |
+
|
8 |
+
class OpenAlex():
|
9 |
+
def __init__(self):
|
10 |
+
pass
|
11 |
+
|
12 |
+
|
13 |
+
|
14 |
+
def search(self,keywords,n_results = 100,after = None,before = None):
|
15 |
+
works = Works().search(keywords).get()
|
16 |
+
|
17 |
+
for page in works.paginate(per_page=n_results):
|
18 |
+
break
|
19 |
+
|
20 |
+
df_works = pd.DataFrame(page)
|
21 |
+
|
22 |
+
return works
|
23 |
+
|
24 |
+
|
25 |
+
def make_network(self):
|
26 |
+
pass
|
27 |
+
|
28 |
+
|
29 |
+
def get_abstract_from_inverted_index(self,index):
|
30 |
+
|
31 |
+
# Determine the maximum index to know the length of the reconstructed array
|
32 |
+
max_index = max([max(positions) for positions in index.values()])
|
33 |
+
|
34 |
+
# Initialize a list with placeholders for all positions
|
35 |
+
reconstructed = [''] * (max_index + 1)
|
36 |
+
|
37 |
+
# Iterate through the inverted index and place each token at its respective position(s)
|
38 |
+
for token, positions in index.items():
|
39 |
+
for position in positions:
|
40 |
+
reconstructed[position] = token
|
41 |
+
|
42 |
+
# Join the tokens to form the reconstructed sentence(s)
|
43 |
+
return ' '.join(reconstructed)
|
climateqa/{knowledge → papers}/openalex.py
RENAMED
@@ -3,32 +3,18 @@ import networkx as nx
|
|
3 |
import matplotlib.pyplot as plt
|
4 |
from pyvis.network import Network
|
5 |
|
6 |
-
from langchain_core.retrievers import BaseRetriever
|
7 |
-
from langchain_core.vectorstores import VectorStoreRetriever
|
8 |
-
from langchain_core.documents.base import Document
|
9 |
-
from langchain_core.vectorstores import VectorStore
|
10 |
-
from langchain_core.callbacks.manager import CallbackManagerForRetrieverRun
|
11 |
-
|
12 |
-
from ..engine.utils import num_tokens_from_string
|
13 |
-
|
14 |
-
from typing import List
|
15 |
-
from pydantic import Field
|
16 |
-
|
17 |
from pyalex import Works, Authors, Sources, Institutions, Concepts, Publishers, Funders
|
18 |
import pyalex
|
19 |
|
20 |
pyalex.config.email = "[email protected]"
|
21 |
|
22 |
-
|
23 |
-
def replace_nan_with_empty_dict(x):
|
24 |
-
return x if pd.notna(x) else {}
|
25 |
-
|
26 |
class OpenAlex():
|
27 |
def __init__(self):
|
28 |
pass
|
29 |
|
30 |
|
31 |
-
|
|
|
32 |
|
33 |
if isinstance(keywords,str):
|
34 |
works = Works().search(keywords)
|
@@ -41,21 +27,18 @@ class OpenAlex():
|
|
41 |
break
|
42 |
|
43 |
df_works = pd.DataFrame(page)
|
44 |
-
df_works = df_works.
|
45 |
-
df_works["primary_location"] = df_works["primary_location"].map(replace_nan_with_empty_dict)
|
46 |
-
df_works["abstract"] = df_works["abstract_inverted_index"].apply(lambda x: self.get_abstract_from_inverted_index(x)).fillna("")
|
47 |
df_works["is_oa"] = df_works["open_access"].map(lambda x : x.get("is_oa",False))
|
48 |
df_works["pdf_url"] = df_works["primary_location"].map(lambda x : x.get("pdf_url",None))
|
49 |
-
df_works["
|
50 |
-
|
51 |
-
df_works["num_tokens"] = df_works["content"].map(lambda x : num_tokens_from_string(x))
|
52 |
-
|
53 |
-
df_works = df_works.drop(columns = ["abstract_inverted_index"])
|
54 |
-
# df_works["subtitle"] = df_works["title"] + " - " + df_works["primary_location"]["source"]["display_name"] + " - " + df_works["publication_year"]
|
55 |
-
|
56 |
-
return df_works
|
57 |
else:
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
|
61 |
def rerank(self,query,df,reranker):
|
@@ -156,36 +139,4 @@ class OpenAlex():
|
|
156 |
reconstructed[position] = token
|
157 |
|
158 |
# Join the tokens to form the reconstructed sentence(s)
|
159 |
-
return ' '.join(reconstructed)
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
class OpenAlexRetriever(BaseRetriever):
|
164 |
-
min_year:int = 1960
|
165 |
-
max_year:int = None
|
166 |
-
k:int = 100
|
167 |
-
|
168 |
-
def _get_relevant_documents(
|
169 |
-
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
170 |
-
) -> List[Document]:
|
171 |
-
|
172 |
-
openalex = OpenAlex()
|
173 |
-
|
174 |
-
# Search for documents
|
175 |
-
df_docs = openalex.search(query,n_results=self.k,after = self.min_year,before = self.max_year)
|
176 |
-
|
177 |
-
docs = []
|
178 |
-
for i,row in df_docs.iterrows():
|
179 |
-
num_tokens = row["num_tokens"]
|
180 |
-
|
181 |
-
if num_tokens < 50 or num_tokens > 1000:
|
182 |
-
continue
|
183 |
-
|
184 |
-
doc = Document(
|
185 |
-
page_content = row["content"],
|
186 |
-
metadata = row.to_dict()
|
187 |
-
)
|
188 |
-
docs.append(doc)
|
189 |
-
return docs
|
190 |
-
|
191 |
-
|
|
|
3 |
import matplotlib.pyplot as plt
|
4 |
from pyvis.network import Network
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
from pyalex import Works, Authors, Sources, Institutions, Concepts, Publishers, Funders
|
7 |
import pyalex
|
8 |
|
9 |
pyalex.config.email = "[email protected]"
|
10 |
|
|
|
|
|
|
|
|
|
11 |
class OpenAlex():
|
12 |
def __init__(self):
|
13 |
pass
|
14 |
|
15 |
|
16 |
+
|
17 |
+
def search(self,keywords,n_results = 100,after = None,before = None):
|
18 |
|
19 |
if isinstance(keywords,str):
|
20 |
works = Works().search(keywords)
|
|
|
27 |
break
|
28 |
|
29 |
df_works = pd.DataFrame(page)
|
30 |
+
df_works["abstract"] = df_works["abstract_inverted_index"].apply(lambda x: self.get_abstract_from_inverted_index(x))
|
|
|
|
|
31 |
df_works["is_oa"] = df_works["open_access"].map(lambda x : x.get("is_oa",False))
|
32 |
df_works["pdf_url"] = df_works["primary_location"].map(lambda x : x.get("pdf_url",None))
|
33 |
+
df_works["content"] = df_works["title"] + "\n" + df_works["abstract"]
|
34 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
else:
|
36 |
+
df_works = []
|
37 |
+
for keyword in keywords:
|
38 |
+
df_keyword = self.search(keyword,n_results = n_results,after = after,before = before)
|
39 |
+
df_works.append(df_keyword)
|
40 |
+
df_works = pd.concat(df_works,ignore_index=True,axis = 0)
|
41 |
+
return df_works
|
42 |
|
43 |
|
44 |
def rerank(self,query,df,reranker):
|
|
|
139 |
reconstructed[position] = token
|
140 |
|
141 |
# Join the tokens to form the reconstructed sentence(s)
|
142 |
+
return ' '.join(reconstructed)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
front/__init__.py
DELETED
File without changes
|
front/callbacks.py
DELETED
File without changes
|
front/utils.py
DELETED
@@ -1,142 +0,0 @@
|
|
1 |
-
|
2 |
-
import re
|
3 |
-
|
4 |
-
def make_pairs(lst):
|
5 |
-
"""from a list of even lenght, make tupple pairs"""
|
6 |
-
return [(lst[i], lst[i + 1]) for i in range(0, len(lst), 2)]
|
7 |
-
|
8 |
-
|
9 |
-
def serialize_docs(docs):
|
10 |
-
new_docs = []
|
11 |
-
for doc in docs:
|
12 |
-
new_doc = {}
|
13 |
-
new_doc["page_content"] = doc.page_content
|
14 |
-
new_doc["metadata"] = doc.metadata
|
15 |
-
new_docs.append(new_doc)
|
16 |
-
return new_docs
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
def parse_output_llm_with_sources(output):
|
21 |
-
# Split the content into a list of text and "[Doc X]" references
|
22 |
-
content_parts = re.split(r'\[(Doc\s?\d+(?:,\s?Doc\s?\d+)*)\]', output)
|
23 |
-
parts = []
|
24 |
-
for part in content_parts:
|
25 |
-
if part.startswith("Doc"):
|
26 |
-
subparts = part.split(",")
|
27 |
-
subparts = [subpart.lower().replace("doc","").strip() for subpart in subparts]
|
28 |
-
subparts = [f"""<a href="#doc{subpart}" class="a-doc-ref" target="_self"><span class='doc-ref'><sup>{subpart}</sup></span></a>""" for subpart in subparts]
|
29 |
-
parts.append("".join(subparts))
|
30 |
-
else:
|
31 |
-
parts.append(part)
|
32 |
-
content_parts = "".join(parts)
|
33 |
-
return content_parts
|
34 |
-
|
35 |
-
|
36 |
-
def make_html_source(source,i):
|
37 |
-
meta = source.metadata
|
38 |
-
# content = source.page_content.split(":",1)[1].strip()
|
39 |
-
content = source.page_content.strip()
|
40 |
-
|
41 |
-
toc_levels = []
|
42 |
-
for j in range(2):
|
43 |
-
level = meta[f"toc_level{j}"]
|
44 |
-
if level != "N/A":
|
45 |
-
toc_levels.append(level)
|
46 |
-
else:
|
47 |
-
break
|
48 |
-
toc_levels = " > ".join(toc_levels)
|
49 |
-
|
50 |
-
if len(toc_levels) > 0:
|
51 |
-
name = f"<b>{toc_levels}</b><br/>{meta['name']}"
|
52 |
-
else:
|
53 |
-
name = meta['name']
|
54 |
-
|
55 |
-
score = meta['reranking_score']
|
56 |
-
if score > 0.8:
|
57 |
-
color = "score-green"
|
58 |
-
elif score > 0.4:
|
59 |
-
color = "score-orange"
|
60 |
-
else:
|
61 |
-
color = "score-red"
|
62 |
-
|
63 |
-
relevancy_score = f"<p class=relevancy-score>Relevancy score: <span class='{color}'>{score:.1%}</span></p>"
|
64 |
-
|
65 |
-
if meta["chunk_type"] == "text":
|
66 |
-
|
67 |
-
card = f"""
|
68 |
-
<div class="card" id="doc{i}">
|
69 |
-
<div class="card-content">
|
70 |
-
<h2>Doc {i} - {meta['short_name']} - Page {int(meta['page_number'])}</h2>
|
71 |
-
<p>{content}</p>
|
72 |
-
{relevancy_score}
|
73 |
-
</div>
|
74 |
-
<div class="card-footer">
|
75 |
-
<span>{name}</span>
|
76 |
-
<a href="{meta['url']}#page={int(meta['page_number'])}" target="_blank" class="pdf-link">
|
77 |
-
<span role="img" aria-label="Open PDF">🔗</span>
|
78 |
-
</a>
|
79 |
-
</div>
|
80 |
-
</div>
|
81 |
-
"""
|
82 |
-
|
83 |
-
else:
|
84 |
-
|
85 |
-
if meta["figure_code"] != "N/A":
|
86 |
-
title = f"{meta['figure_code']} - {meta['short_name']}"
|
87 |
-
else:
|
88 |
-
title = f"{meta['short_name']}"
|
89 |
-
|
90 |
-
card = f"""
|
91 |
-
<div class="card card-image">
|
92 |
-
<div class="card-content">
|
93 |
-
<h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
|
94 |
-
<p>{content}</p>
|
95 |
-
<p class='ai-generated'>AI-generated description</p>
|
96 |
-
{relevancy_score}
|
97 |
-
</div>
|
98 |
-
<div class="card-footer">
|
99 |
-
<span>{name}</span>
|
100 |
-
<a href="{meta['url']}#page={int(meta['page_number'])}" target="_blank" class="pdf-link">
|
101 |
-
<span role="img" aria-label="Open PDF">🔗</span>
|
102 |
-
</a>
|
103 |
-
</div>
|
104 |
-
</div>
|
105 |
-
"""
|
106 |
-
|
107 |
-
return card
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
def make_toolbox(tool_name,description = "",checked = False,elem_id = "toggle"):
|
112 |
-
|
113 |
-
if checked:
|
114 |
-
span = "<span class='checkmark'>✓</span>"
|
115 |
-
else:
|
116 |
-
span = "<span class='loader'></span>"
|
117 |
-
|
118 |
-
# toolbox = f"""
|
119 |
-
# <div class="dropdown">
|
120 |
-
# <label for="{elem_id}" class="dropdown-toggle">
|
121 |
-
# {span}
|
122 |
-
# {tool_name}
|
123 |
-
# <span class="caret"></span>
|
124 |
-
# </label>
|
125 |
-
# <input type="checkbox" id="{elem_id}" hidden/>
|
126 |
-
# <div class="dropdown-content">
|
127 |
-
# <p>{description}</p>
|
128 |
-
# </div>
|
129 |
-
# </div>
|
130 |
-
# """
|
131 |
-
|
132 |
-
|
133 |
-
toolbox = f"""
|
134 |
-
<div class="dropdown">
|
135 |
-
<label for="{elem_id}" class="dropdown-toggle">
|
136 |
-
{span}
|
137 |
-
{tool_name}
|
138 |
-
</label>
|
139 |
-
</div>
|
140 |
-
"""
|
141 |
-
|
142 |
-
return toolbox
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1,20 +1,13 @@
|
|
1 |
-
gradio==
|
2 |
azure-storage-file-share==12.11.1
|
3 |
azure-storage-blob
|
4 |
python-dotenv==1.0.0
|
5 |
-
langchain==0.
|
6 |
-
langchain_openai==0.
|
7 |
-
|
8 |
-
pinecone-client==4.1.0
|
9 |
sentence-transformers==2.6.0
|
10 |
huggingface-hub
|
|
|
11 |
pyalex==0.13
|
12 |
networkx==3.2.1
|
13 |
-
pyvis==0.3.2
|
14 |
-
flashrank==0.2.5
|
15 |
-
rerankers==0.3.0
|
16 |
-
torch==2.3.0
|
17 |
-
nvidia-cudnn-cu12==8.9.2.26
|
18 |
-
langchain-community==0.2
|
19 |
-
msal==1.31
|
20 |
-
matplotlib==3.9.2
|
|
|
1 |
+
gradio==4.19.1
|
2 |
azure-storage-file-share==12.11.1
|
3 |
azure-storage-blob
|
4 |
python-dotenv==1.0.0
|
5 |
+
langchain==0.1.4
|
6 |
+
langchain_openai==0.0.6
|
7 |
+
pinecone-client==3.0.2
|
|
|
8 |
sentence-transformers==2.6.0
|
9 |
huggingface-hub
|
10 |
+
msal
|
11 |
pyalex==0.13
|
12 |
networkx==3.2.1
|
13 |
+
pyvis==0.3.2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sandbox/20240310 - CQA - Semantic Routing 1.ipynb
DELETED
The diff for this file is too large to render.
See raw diff
|
|
style.css
CHANGED
@@ -2,14 +2,6 @@
|
|
2 |
/* :root {
|
3 |
--user-image: url('https://ih1.redbubble.net/image.4776899543.6215/st,small,507x507-pad,600x600,f8f8f8.jpg');
|
4 |
} */
|
5 |
-
.avatar-container.svelte-1x5p6hu:not(.thumbnail-item) img {
|
6 |
-
width: 100%;
|
7 |
-
height: 100%;
|
8 |
-
object-fit: cover;
|
9 |
-
border-radius: 50%;
|
10 |
-
padding: 0px;
|
11 |
-
margin: 0px;
|
12 |
-
}
|
13 |
|
14 |
.warning-box {
|
15 |
background-color: #fff3cd;
|
@@ -65,7 +57,6 @@ body.dark .tip-box * {
|
|
65 |
|
66 |
.message{
|
67 |
font-size:14px !important;
|
68 |
-
|
69 |
}
|
70 |
|
71 |
|
@@ -74,10 +65,6 @@ a {
|
|
74 |
color: inherit;
|
75 |
}
|
76 |
|
77 |
-
.doc-ref sup{
|
78 |
-
color:#dc2626!important;
|
79 |
-
/* margin-right:1px; */
|
80 |
-
}
|
81 |
.card {
|
82 |
background-color: white;
|
83 |
border-radius: 10px;
|
@@ -376,108 +363,3 @@ span.chatbot > p > img{
|
|
376 |
.a-doc-ref{
|
377 |
text-decoration: none !important;
|
378 |
}
|
379 |
-
|
380 |
-
|
381 |
-
.dropdown {
|
382 |
-
position: relative;
|
383 |
-
display:inline-block;
|
384 |
-
margin-bottom: 10px;
|
385 |
-
}
|
386 |
-
|
387 |
-
.dropdown-toggle {
|
388 |
-
background-color: #f2f2f2;
|
389 |
-
color: black;
|
390 |
-
padding: 10px;
|
391 |
-
font-size: 16px;
|
392 |
-
cursor: pointer;
|
393 |
-
display: block;
|
394 |
-
width: 400px; /* Adjust width as needed */
|
395 |
-
position: relative;
|
396 |
-
display: flex;
|
397 |
-
align-items: center; /* Vertically center the contents */
|
398 |
-
justify-content: left;
|
399 |
-
}
|
400 |
-
|
401 |
-
.dropdown-toggle .caret {
|
402 |
-
content: "";
|
403 |
-
position: absolute;
|
404 |
-
right: 10px;
|
405 |
-
top: 50%;
|
406 |
-
border-left: 5px solid transparent;
|
407 |
-
border-right: 5px solid transparent;
|
408 |
-
border-top: 5px solid black;
|
409 |
-
transform: translateY(-50%);
|
410 |
-
}
|
411 |
-
|
412 |
-
input[type="checkbox"] {
|
413 |
-
display: none !important;
|
414 |
-
}
|
415 |
-
|
416 |
-
input[type="checkbox"]:checked + .dropdown-content {
|
417 |
-
display: block;
|
418 |
-
}
|
419 |
-
|
420 |
-
.dropdown-content {
|
421 |
-
display: none;
|
422 |
-
position: absolute;
|
423 |
-
background-color: #f9f9f9;
|
424 |
-
min-width: 300px;
|
425 |
-
box-shadow: 0 8px 16px 0 rgba(0,0,0,0.2);
|
426 |
-
z-index: 1;
|
427 |
-
padding: 12px;
|
428 |
-
border: 1px solid #ccc;
|
429 |
-
}
|
430 |
-
|
431 |
-
input[type="checkbox"]:checked + .dropdown-toggle + .dropdown-content {
|
432 |
-
display: block;
|
433 |
-
}
|
434 |
-
|
435 |
-
input[type="checkbox"]:checked + .dropdown-toggle .caret {
|
436 |
-
border-top: 0;
|
437 |
-
border-bottom: 5px solid black;
|
438 |
-
}
|
439 |
-
|
440 |
-
.loader {
|
441 |
-
border: 1px solid #d0d0d0 !important; /* Light grey background */
|
442 |
-
border-top: 1px solid #db3434 !important; /* Blue color */
|
443 |
-
border-right: 1px solid #3498db !important; /* Blue color */
|
444 |
-
border-radius: 50%;
|
445 |
-
width: 20px;
|
446 |
-
height: 20px;
|
447 |
-
animation: spin 2s linear infinite;
|
448 |
-
display:inline-block;
|
449 |
-
margin-right:10px !important;
|
450 |
-
}
|
451 |
-
|
452 |
-
.checkmark{
|
453 |
-
color:green !important;
|
454 |
-
font-size:18px;
|
455 |
-
margin-right:10px !important;
|
456 |
-
}
|
457 |
-
|
458 |
-
@keyframes spin {
|
459 |
-
0% { transform: rotate(0deg); }
|
460 |
-
100% { transform: rotate(360deg); }
|
461 |
-
}
|
462 |
-
|
463 |
-
|
464 |
-
.relevancy-score{
|
465 |
-
margin-top:10px !important;
|
466 |
-
font-size:10px !important;
|
467 |
-
font-style:italic;
|
468 |
-
}
|
469 |
-
|
470 |
-
.score-green{
|
471 |
-
color:green !important;
|
472 |
-
}
|
473 |
-
|
474 |
-
.score-orange{
|
475 |
-
color:orange !important;
|
476 |
-
}
|
477 |
-
|
478 |
-
.score-orange{
|
479 |
-
color:red !important;
|
480 |
-
}
|
481 |
-
.message-buttons-left.panel.message-buttons.with-avatar {
|
482 |
-
display: none;
|
483 |
-
}
|
|
|
2 |
/* :root {
|
3 |
--user-image: url('https://ih1.redbubble.net/image.4776899543.6215/st,small,507x507-pad,600x600,f8f8f8.jpg');
|
4 |
} */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
.warning-box {
|
7 |
background-color: #fff3cd;
|
|
|
57 |
|
58 |
.message{
|
59 |
font-size:14px !important;
|
|
|
60 |
}
|
61 |
|
62 |
|
|
|
65 |
color: inherit;
|
66 |
}
|
67 |
|
|
|
|
|
|
|
|
|
68 |
.card {
|
69 |
background-color: white;
|
70 |
border-radius: 10px;
|
|
|
363 |
.a-doc-ref{
|
364 |
text-decoration: none !important;
|
365 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test.json
DELETED
File without changes
|