Spaces:
Sleeping
Sleeping
from .model import InformationExtractedFromABillReceipt as PydanticModel | |
from langchain.chains import LLMChain | |
from langchain.chat_models import ChatOpenAI | |
from langchain.output_parsers import PydanticOutputParser, OutputFixingParser | |
from langchain.prompts import ( | |
ChatPromptTemplate, | |
HumanMessagePromptTemplate, | |
SystemMessagePromptTemplate, | |
) | |
model = ChatOpenAI( | |
temperature=0.6, | |
max_tokens=300, | |
n=1, | |
request_timeout=None, | |
model_kwargs={ | |
'stop': None, | |
'top_p': 1, | |
} | |
) | |
# Build category chain | |
system_message_prompt = SystemMessagePromptTemplate.from_template( | |
"You are tasked with developing an OCR data extraction system for hotel bills in PDF " | |
"format given as text. The system should extract important information necessary for " | |
"the reimbursement process from a college. Your prompt should fetch the following " | |
"essential details from the hotel bill: hotel name, address, bill number/invoice " | |
"number, booking ID / confirmation ID / booking number, check-in date and time, " | |
"check-out date and time, total amount, booking platform, bill date.\n" | |
"Ensure that the system accurately extracts the above information from the OCR text " | |
"of the hotel bill. Fields with formats specified as date, time, or datetime should " | |
"be ISO 8601 compliant.\n" | |
"{format_instructions}" | |
) | |
human_message_prompt = HumanMessagePromptTemplate.from_template("{text}") | |
chat_prompt = ChatPromptTemplate.from_messages( | |
[system_message_prompt, human_message_prompt] | |
) | |
output_parser = PydanticOutputParser(pydantic_object=PydanticModel) | |
fixing_parser = OutputFixingParser.from_llm(llm=model, parser=output_parser) | |
chain = LLMChain(llm=model, prompt=chat_prompt, output_parser=fixing_parser) | |