Spaces:

Ekimetrics
/

celsius-csrd-chatbot

Sleeping

App Files Files Community

celsius-csrd-chatbot / celsius_csrd_chatbot /chains /esrs_intent.py

momenaca

feature/major backend update with agent

8ca00e0 6 months ago

raw

history blame contribute delete

4.2 kB

	from langchain_core.pydantic_v1 import BaseModel, Field
	from langchain.prompts.prompt import PromptTemplate
	from langchain.output_parsers import PydanticOutputParser
	from typing import Literal
	from operator import itemgetter
	import json
	from langchain_core.exceptions import OutputParserException


	class ESRSAnalysis(BaseModel):
	"""Analyzing the user query to get ESRS type, sources and intent"""

	esrs_type: Literal[
	"ESRS 1",
	"ESRS 2",
	"ESRS E1",
	"ESRS E2",
	"ESRS E3",
	"ESRS E4",
	"ESRS E5",
	"ESRS S1",
	"ESRS S2",
	"ESRS S3",
	"ESRS S4",
	"ESRS G1",
	"no_intent",
	] = Field(
	description="""The ESRS type that the user query refers to.""",
	)


	def make_esrs_intent_chain(llm):
	prompt_template = """
	Please analyze the question and indicate if it refers to a specific ESRS.

	Follow these definitions in order to choose the appropriate ESRS :
	- ESRS 1 is for questions about general principles for preparing and presenting sustainability information in accordance with CSRD
	- ESRS 2 is for questions about general disclosures related to sustainability reporting, including governance, strategy, impact, risk, opportunity management, and metrics and targets
	- ESRS E1 is for questions about climate change, global warming, GES and energy
	- ESRS E2 is for questions about air, water, and soil pollution, and dangerous substances
	- ESRS E3 is for questions about water and marine resources
	- ESRS E4 is for questions about biodiversity, nature, wildlife and ecosystems
	- ESRS E5 is for questions about resource use and circular economy
	- ESRS S1 is for questions about workforce and labor issues, job security, fair pay, and health and safety
	- ESRS S2 is for questions about workers in the value chain, workers' treatment
	- ESRS S3 is for questions about affected communities, impact on local communities
	- ESRS S4 is for questions about consumers and end users, customer privacy, safety, and inclusion
	- ESRS G1 is for questions about governance, risk management, internal control, and business conduct
	- no_intent is for questions that do not fit into any of the above categories

	Keep in mind these guidelines :
	- Some questions could be related to multiple ESRS. In such case, choose the most appropriate one.

	The output needs to respect a JSON format with 'esrs_type' as the key and the appropriate ESRS as the value.

	Question: '{query}'
	Answer:
	"""
	parser = PydanticOutputParser(pydantic_object=ESRSAnalysis, method="json_mode")
	prompt = PromptTemplate(template=prompt_template, input_variables=["query"])
	chain = {"query": itemgetter("query")} \| prompt \| llm \| parser

	return chain


	def make_esrs_intent_node(llm):

	def intent_message(state):
	query = state["query"]
	categorization_chain = make_esrs_intent_chain(llm)
	output = {
	"esrs_type": [categorization_chain.invoke({"query": query}).esrs_type]
	}

	return output

	return intent_message

	# intent: str = Field(
	# enum=[
	# "Specific topic",
	# "Implementation reco",
	# "KPI extraction",
	# ],
	# description="""
	# Categorize the user query in one of the following categories,

	# Examples:
	# - Specific topic: "What are the specificities of ESRS E1 ?"
	# - Implementation reco: "How should I compute my scope 1 reduction target ?"
	# - KPI extraction: "When will the CSRD be mandatory for my small French company ?"
	# """,
	# )

	# sources: str = Field(
	# enum=["ESRS", "External"],
	# description="""
	# Given a user question choose which documents would be most relevant for answering their question,
	# - ESRS is for questions about a specific environmental, social or governance topic, as well as CSRD's general principles and disclosures
	# - External is for questions about how to implement the CSRD, or general questions about CSRD's context
	# """,
	# )