Spaces:
Sleeping
Sleeping
File size: 1,677 Bytes
317211f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
from .model import InformationExtractedFromABillReceipt as PydanticModel
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers import PydanticOutputParser, OutputFixingParser
from langchain.prompts import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
model = ChatOpenAI(
temperature=0.6,
max_tokens=300,
n=1,
request_timeout=None,
model_kwargs={
'stop': None,
'top_p': 1,
}
)
# Build category chain
system_message_prompt = SystemMessagePromptTemplate.from_template(
"You are tasked with developing an OCR data extraction system for hotel bills in PDF "
"format given as text. The system should extract important information necessary for "
"the reimbursement process from a college. Your prompt should fetch the following "
"essential details from the hotel bill: hotel name, address, bill number/invoice "
"number, booking ID / confirmation ID / booking number, check-in date and time, "
"check-out date and time, total amount, booking platform, bill date.\n"
"Ensure that the system accurately extracts the above information from the OCR text "
"of the hotel bill.\n"
"{format_instructions}"
)
human_message_prompt = HumanMessagePromptTemplate.from_template("{text}")
chat_prompt = ChatPromptTemplate.from_messages(
[system_message_prompt, human_message_prompt]
)
output_parser = PydanticOutputParser(pydantic_object=PydanticModel)
fixing_parser = OutputFixingParser.from_llm(llm=model, parser=output_parser)
chain = LLMChain(llm=model, prompt=chat_prompt, output_parser=fixing_parser)
|