In [None]:
import pandas as pd 
import numpy as np
import os

%load_ext autoreload
%autoreload 2

import sys
sys.path.append(os.path.dirname(os.getcwd()))

from dotenv import load_dotenv
load_dotenv()

In [None]:
from climateqa.engine.llm import get_llm
from climateqa.engine.vectorstore import get_pinecone_vectorstore
from climateqa.engine.embeddings import get_embeddings_function
from climateqa.engine.reranker import get_reranker
from climateqa.engine.graph import make_graph_agent, display_graph


## LLM

In [None]:
from climateqa.engine.llm import get_llm

llm = get_llm(provider="openai")
llm.invoke("Say Hello !")


## Retriever 

In [None]:
from climateqa.engine.vectorstore import get_pinecone_vectorstore
from climateqa.engine.embeddings import get_embeddings_function

question = "What is the impact of climate change on the environment?"

embeddings_function = get_embeddings_function()
vectorstore_ipcc = get_pinecone_vectorstore(embeddings_function)
docs_question = vectorstore_ipcc.search(query = question, search_type="similarity")
docs_question

In [None]:
# optional filters
sources_owid = ["OWID"]
filters = {}
filters["source"] = {"$in": sources_owid}

# vectorestore_graphs
vectorstore_graphs = get_pinecone_vectorstore(embeddings_function, index_name = os.getenv("PINECONE_API_INDEX_OWID"), text_key="title")
owid_graphs = vectorstore_graphs.search(query = question, search_type="similarity")
owid_graphs = vectorstore_graphs.similarity_search_with_score(query = question, filter=filters, k=5)
owid_graphs

In [None]:
vectorstore_region = get_pinecone_vectorstore(embeddings_function, index_name=os.getenv("PINECONE_API_INDEX_REGION"))


## Reranker

In [None]:
from climateqa.engine.reranker import get_reranker
from climateqa.engine.reranker import rerank_docs

reranker = get_reranker("nano")
reranked_docs_question = rerank_docs(reranker,docs_question,question)
reranked_docs_question

# Graph

In [None]:
from climateqa.engine.graph import make_graph_agent, display_graph, make_graph_agent_poc

llm = get_llm(provider="openai")
embeddings_function = get_embeddings_function()
vectorstore_ipcc = get_pinecone_vectorstore(embeddings_function)
vectorstore_graphs = get_pinecone_vectorstore(embeddings_function, index_name = os.getenv("PINECONE_API_INDEX_OWID"), text_key="title")
vectorstore_region = get_pinecone_vectorstore(embeddings_function, index_name=os.getenv("PINECONE_API_INDEX_REGION"))
reranker = get_reranker("nano")

app = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore_ipcc, vectorstore_graphs=vectorstore_graphs, vectorstore_region=vectorstore_region, reranker=reranker)
app2 = make_graph_agent_poc(llm=llm, vectorstore_ipcc=vectorstore_ipcc, vectorstore_graphs=vectorstore_graphs, vectorstore_region=vectorstore_region, reranker=reranker)
display_graph(app)
display_graph(app2)

In [None]:
from climateqa.engine.graph import search 

from climateqa.engine.chains.intent_categorization import make_intent_categorization_node


from climateqa.engine.chains.answer_chitchat import make_chitchat_node
from climateqa.engine.chains.answer_ai_impact import make_ai_impact_node
from climateqa.engine.chains.query_transformation import make_query_transform_node
from climateqa.engine.chains.translation import make_translation_node
from climateqa.engine.chains.retrieve_documents import make_IPx_retriever_node, make_POC_retriever_node
from climateqa.engine.chains.answer_rag import make_rag_node
from climateqa.engine.chains.graph_retriever import make_graph_retriever_node
from climateqa.engine.chains.chitchat_categorization import make_chitchat_intent_categorization_node
from climateqa.engine.chains.prompts import audience_prompts
from climateqa.engine.graph import route_intent


In [None]:
inial_state = {
 "user_input": "What is the impact of climate change on the environment?", 
 # "user_input": "Quel est l'impact du changement climatique sur Bordeaux ?",
 "audience" : audience_prompts["general"],
 # "sources_input":["IPCC"],
 "relevant_content_sources_selection": ["Figures (IPCC/IPBES)","POC region"],
 "search_only" : False,
 "reports": [],
}
state=inial_state.copy()

In [None]:
cat_node = make_intent_categorization_node(llm)
state.update(cat_node(inial_state))
state

In [None]:
# state.update(search(state))
# state

In [None]:
intent = route_intent(state)

if route_intent(state) == "translate_query":
 make_translation_node(llm)(state)

In [None]:
state.update(make_query_transform_node(llm)(state))
state

In [None]:
from climateqa.engine.chains.retrieve_documents import retrieve_documents
res = await retrieve_documents(state["questions_list"][0],{},"IPx", vectorstore_ipcc,reranker)
res

In [None]:
from climateqa.engine.chains.retrieve_documents import retrieve_documents_for_all_questions

source_type = "IPx"
to_handle_questions_index = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "IPx"]

search_figures = "Figures (IPCC/IPBES)" in state["relevant_content_sources_selection"]
search_only = state["search_only"]
reports = state["reports"]
questions_list = state["questions_list"]
n_questions=state["n_questions"]["total"]
k_final=15
k_before_reranking=100

res = await retrieve_documents_for_all_questions(
 search_figures=search_figures,
 search_only=search_only,
 reports=reports,
 questions_list=questions_list,
 n_questions=n_questions,
 config={},
 source_type=source_type,
 to_handle_questions_index=to_handle_questions_index,
 vectorstore=vectorstore_ipcc,
 reranker=reranker,
 rerank_by_question=True,
 k_final=k_final,
 k_before_reranking=k_before_reranking,
 )
state.update(res)
state

In [None]:
state.update(await make_graph_retriever_node(vectorstore_graphs, reranker)(state))
state

In [None]:
answer_rag = await make_rag_node(llm)(state,{})
state.update(answer_rag)

# stream event of the whole chain

In [None]:

from climateqa.engine.graph import make_graph_agent, display_graph
from climateqa.engine.chains.prompts import audience_prompts


inial_state = { 
 "user_input": "Comment le changement climatique m'affectera à Paris?",
 "audience" : audience_prompts["general"],
 "sources_input":["IPCC"],
 "relevant_content_sources_selection": ["Figures (IPCC/IPBES)","POC region"],
 "search_only" : False,
 "reports": [],
}
app = make_graph_agent_poc(llm=llm, vectorstore_ipcc=vectorstore_ipcc, vectorstore_graphs=vectorstore_graphs, vectorstore_region=vectorstore_region, reranker=reranker)

inial_state

In [None]:
event_list = app.astream_events(inial_state, version = "v1")
static_event_list = []
async for event in event_list:
 static_event_list.append(event)

In [None]:
df_events = pd.DataFrame(static_event_list)
df_events["node"] = df_events["metadata"].apply(lambda x: x["langgraph_node"] if "langgraph_node" in x else "None")
# df_events_chat = df_events[(df_events["event"] =="on_chat_model_stream")]
df_events_chat = df_events[df_events["node"].apply(lambda x : x in ["answer_rag","answer_rag_no_docs", "answer_search", "answer_chitchat"])]
node_end_answer = df_events_chat[(df_events_chat["event"] =="on_chain_end") & (df_events_chat["name"] =="answer_rag")]
node_end_answer["data"].values[0]["output"]
# df_events_chat["data"].apply(lambda x: x["content"])

In [None]:
ev_rel_doc = df_events.iloc[df_events[(df_events["event"] =="on_chain_end")]["data"].apply(lambda x: x["output"]).dropna().apply(lambda x: x["related_contents"] if "related_contents" in x else None).dropna().index].iloc[-1]
related_content = ev_rel_doc["data"]["output"]["related_contents"]
# [f"{d.metadata['short_name']} - {d.metadata['name']}" for d in related_content]
related_content[0].metadata

In [None]:
df_events[(df_events["event"] =="on_chain_end") & (df_events["name"]=="transform_query")]["data"].values[0]

In [None]:
df_events[(df_events["event"] =="on_chain_end") & (df_events["name"]=="answer_search")]["data"].values[0]["input"]

In [None]:
node_end_answer

In [None]:
# Get the answer at the end
from climateqa.handle_stream_events import stream_answer
event_list = app.astream_events(inial_state, version = "v1")
history = []
start_streaming = False
answer_message_content = ""
async for event in event_list:

 if "langgraph_node" in event["metadata"]:
 node = event["metadata"]["langgraph_node"]

 if (event["name"] != "transform_query" and 
 event["event"] == "on_chat_model_stream" and
 node in ["answer_rag","answer_rag_no_docs", "answer_search", "answer_chitchat"]):
 history, start_streaming, answer_message_content = stream_answer(
 history, event, start_streaming, answer_message_content
 )

# Test events logs


In [None]:
inial_state = {'user_input': 'What is the impact of climate in Bordeaux',
 'audience': 'the general public who know the basics in science and climate change and want to learn more about it without technical terms. Still use references to passages.',
 'sources_input': ['IPCC'],
 'relevant_content_sources_selection': ['Figures (IPCC/IPBES)', 'POC region'],
 'search_only': False,
 'reports': []
 }

In [None]:
# Get the answer at the end
from climateqa.handle_stream_events import stream_answer
app = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore_ipcc, vectorstore_graphs=vectorstore_graphs, vectorstore_region=vectorstore_region, reranker=reranker)

event_list = app.astream_events(inial_state, version = "v1")
history = []
start_streaming = False
answer_message_content = ""
static_event_list = []
async for event in event_list:
 static_event_list.append(event)

In [None]:
df_static_events = pd.DataFrame(static_event_list)

In [None]:
df_static_events.head()

In [None]:
df_static_events["name"].unique()

In [None]:
selected_events = df_static_events[
 (df_static_events["event"] == "on_chain_end") &
 (df_static_events["name"].isin(["retrieve_documents", "retrieve_local_data", "retrieve_POC_docs_node","retrieve_IPx_docs"]))
 # (df_static_events["data"].apply(lambda x: x["output"] is not None))
]
selected_events

In [None]:
# selected_events[selected_events["data"].apply(lambda x : "output" in x and x["output"] is not None)]
selected_events["data"].apply(lambda x : x["output"]["documents"])

In [None]:
selected_events = df_static_events[
 (df_static_events["event"] == "on_chain_end") &
 (df_static_events["name"].isin(["answer_search"]))
 # (df_static_events["data"].apply(lambda x: x["output"] is not None))
]
selected_events["metadata"]

In [None]:
selected_events["data"].iloc[0]["input"]["related_contents"]

In [None]:
selected_events["data"].apply(lambda x : x["output"]).iloc[2]

In [None]:
selected_events.iloc[0]["data"].values()

In [None]:
selected_events.iloc[1]["data"].values()

In [None]:
list(selected_events.iloc[0]["data"].values())

In [None]:
list(selected_events.iloc[1]["data"].values())

In [None]:
list(selected_events.iloc[2]["data"].values())

In [None]:
list(selected_events.iloc[3]["data"].values())

In [None]:
# import json

# print(json.dumps(list(selected_events.iloc[1]["data"].values()), indent=4))



In [None]:

data_values = selected_events.iloc[1]["data"].values()
formatted_data = json.dumps(list(data_values)[0], indent=4)
print(formatted_data)

In [None]:
from pprint import pprint
import json
selected_events.iloc[2]["data"].values()

In [None]:
selected_events.iloc[3]["data"].values()

In [None]:
df_static_events[df_static_events["name"] == "retrieve_POC_docs_node"].iloc[0]