from .model import InformationExtractedFromABillReceipt as PydanticModel from langchain.chains import LLMChain from langchain.chat_models import ChatOpenAI from langchain.output_parsers import PydanticOutputParser, OutputFixingParser from langchain.prompts import ( ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate, ) model = ChatOpenAI( temperature=0.6, max_tokens=300, n=1, request_timeout=None, model_kwargs={ 'stop': None, 'top_p': 1, } ) # Build category chain system_message_prompt = SystemMessagePromptTemplate.from_template( "You are tasked with developing an OCR data extraction system for hotel bills in PDF " "format given as text. The system should extract important information necessary for " "the reimbursement process from a college. Your prompt should fetch the following " "essential details from the hotel bill: hotel name, address, bill number/invoice " "number, booking ID / confirmation ID / booking number, check-in date and time, " "check-out date and time, total amount, booking platform, bill date.\n" "Ensure that the system accurately extracts the above information from the OCR text " "of the hotel bill. Fields with formats specified as date, time, or datetime should " "be ISO 8601 compliant.\n" "{format_instructions}" ) human_message_prompt = HumanMessagePromptTemplate.from_template("{text}") chat_prompt = ChatPromptTemplate.from_messages( [system_message_prompt, human_message_prompt] ) output_parser = PydanticOutputParser(pydantic_object=PydanticModel) fixing_parser = OutputFixingParser.from_llm(llm=model, parser=output_parser) chain = LLMChain(llm=model, prompt=chat_prompt, output_parser=fixing_parser)