Spaces:
Sleeping
Sleeping
from langchain_core.pydantic_v1 import BaseModel, Field | |
from langchain.prompts.prompt import PromptTemplate | |
from langchain.output_parsers import PydanticOutputParser | |
from typing import Literal | |
from operator import itemgetter | |
import json | |
from langchain_core.exceptions import OutputParserException | |
class ESRSAnalysis(BaseModel): | |
"""Analyzing the user query to get ESRS type, sources and intent""" | |
esrs_type: Literal[ | |
"ESRS 1", | |
"ESRS 2", | |
"ESRS E1", | |
"ESRS E2", | |
"ESRS E3", | |
"ESRS E4", | |
"ESRS E5", | |
"ESRS S1", | |
"ESRS S2", | |
"ESRS S3", | |
"ESRS S4", | |
"ESRS G1", | |
"no_intent", | |
] = Field( | |
description="""The ESRS type that the user query refers to.""", | |
) | |
def make_esrs_intent_chain(llm): | |
prompt_template = """ | |
Please analyze the question and indicate if it refers to a specific ESRS. | |
Follow these definitions in order to choose the appropriate ESRS : | |
- ESRS 1 is for questions about general principles for preparing and presenting sustainability information in accordance with CSRD | |
- ESRS 2 is for questions about general disclosures related to sustainability reporting, including governance, strategy, impact, risk, opportunity management, and metrics and targets | |
- ESRS E1 is for questions about climate change, global warming, GES and energy | |
- ESRS E2 is for questions about air, water, and soil pollution, and dangerous substances | |
- ESRS E3 is for questions about water and marine resources | |
- ESRS E4 is for questions about biodiversity, nature, wildlife and ecosystems | |
- ESRS E5 is for questions about resource use and circular economy | |
- ESRS S1 is for questions about workforce and labor issues, job security, fair pay, and health and safety | |
- ESRS S2 is for questions about workers in the value chain, workers' treatment | |
- ESRS S3 is for questions about affected communities, impact on local communities | |
- ESRS S4 is for questions about consumers and end users, customer privacy, safety, and inclusion | |
- ESRS G1 is for questions about governance, risk management, internal control, and business conduct | |
- no_intent is for questions that do not fit into any of the above categories | |
Keep in mind these guidelines : | |
- Some questions could be related to multiple ESRS. In such case, choose the most appropriate one. | |
The output needs to respect a JSON format with 'esrs_type' as the key and the appropriate ESRS as the value. | |
Question: '{query}' | |
Answer: | |
""" | |
parser = PydanticOutputParser(pydantic_object=ESRSAnalysis, method="json_mode") | |
prompt = PromptTemplate(template=prompt_template, input_variables=["query"]) | |
chain = {"query": itemgetter("query")} | prompt | llm | parser | |
return chain | |
def make_esrs_intent_node(llm): | |
def intent_message(state): | |
query = state["query"] | |
categorization_chain = make_esrs_intent_chain(llm) | |
output = { | |
"esrs_type": [categorization_chain.invoke({"query": query}).esrs_type] | |
} | |
return output | |
return intent_message | |
# intent: str = Field( | |
# enum=[ | |
# "Specific topic", | |
# "Implementation reco", | |
# "KPI extraction", | |
# ], | |
# description=""" | |
# Categorize the user query in one of the following categories, | |
# Examples: | |
# - Specific topic: "What are the specificities of ESRS E1 ?" | |
# - Implementation reco: "How should I compute my scope 1 reduction target ?" | |
# - KPI extraction: "When will the CSRD be mandatory for my small French company ?" | |
# """, | |
# ) | |
# sources: str = Field( | |
# enum=["ESRS", "External"], | |
# description=""" | |
# Given a user question choose which documents would be most relevant for answering their question, | |
# - ESRS is for questions about a specific environmental, social or governance topic, as well as CSRD's general principles and disclosures | |
# - External is for questions about how to implement the CSRD, or general questions about CSRD's context | |
# """, | |
# ) | |