Spaces:
Sleeping
Sleeping
from .model import InformationExtractedFromABillReceipt as PydanticModel | |
from langchain.chains import LLMChain | |
from langchain.chat_models import ChatOpenAI | |
from langchain.output_parsers import PydanticOutputParser, OutputFixingParser | |
from langchain.prompts import ( | |
ChatPromptTemplate, | |
HumanMessagePromptTemplate, | |
SystemMessagePromptTemplate, | |
) | |
model = ChatOpenAI( | |
temperature=0, | |
n=1, | |
model_kwargs={ | |
"stop": None, | |
"top_p": 1, | |
"frequency_penalty": 0, | |
"presence_penalty": 0, | |
}, | |
) | |
# Build category chain | |
system_message_prompt = SystemMessagePromptTemplate.from_template( | |
"You are an information extraction engine that outputs details from OCR processed " | |
"documents like uids, total, tax, addresses, bank details, invoice details, " | |
"participant registration details. Fields with formats specified as date, time, or " | |
"datetime should be ISO 8601 compliant.\n" | |
"{format_instructions}" | |
) | |
human_message_prompt = HumanMessagePromptTemplate.from_template("{text}") | |
chat_prompt = ChatPromptTemplate.from_messages( | |
[system_message_prompt, human_message_prompt] | |
) | |
output_parser = PydanticOutputParser(pydantic_object=PydanticModel) | |
print(output_parser.get_format_instructions()) | |
# exit() | |
fixing_parser = OutputFixingParser.from_llm(llm=model, parser=output_parser) | |
chain = LLMChain(llm=model, prompt=chat_prompt, output_parser=fixing_parser) | |