from .model import InformationExtractedFromABillReceipt as PydanticModel from langchain.chains import LLMChain from langchain.chat_models import ChatOpenAI from langchain.output_parsers import PydanticOutputParser, OutputFixingParser from langchain.prompts import ( ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate, ) model = ChatOpenAI( temperature=0, n=1, model_kwargs={ 'stop': None, 'top_p': 1, 'frequency_penalty': 0, 'presence_penalty': 0, } ) # Build category chain system_message_prompt = SystemMessagePromptTemplate.from_template( "You are an information extraction engine that outputs details from OCR processed " "documents like uids, total, tax, name, currency, date, seller details, summary. You " "may use context to make an educated guess about the currency. Use null if you are " "unable to find certain details. Fields with formats specified as date, time, or " "datetime should be ISO 8601 compliant.\n" "{format_instructions}" ) human_message_prompt = HumanMessagePromptTemplate.from_template("{text}") chat_prompt = ChatPromptTemplate.from_messages( [system_message_prompt, human_message_prompt] ) output_parser = PydanticOutputParser(pydantic_object=PydanticModel) fixing_parser = OutputFixingParser.from_llm(llm=model, parser=output_parser) chain = LLMChain(llm=model, prompt=chat_prompt, output_parser=fixing_parser) if __name__ == "__main__": text = """amazonin we) Sold By : Spigen India Pvt. Ltd. * Rect/Killa Nos. 38//8/2 min, 192//22/1,196//2/1/1, 37//15/1, 15/2,, Adjacent to Starex School, Village - Binola, National Highway -8, Tehsil - Manesar Gurgaon, Haryana, 122413 IN PAN No: ABACS5056L GST Registration No: O6ABACS5056L12Z5 Order Number: 407-5335982-7837125 Order Date: 30.05.2023 Tax Invoice/Bill of Supply/Cash Memo (Original for Recipient) Billing Address : Praveen Bohra E-303, ParkView City 2, Sector 49, Sohna Road GURGAON, HARYANA, 122018 IN State/UT Code: 06 Shipping Address : Praveen Bohra Praveen Bohra E-303, ParkView City 2, Sector 49, Sohna Road GURGAON, HARYANA, 122018 IN State/UT Code: 06 Place of supply: HARYANA Place of delivery: HARYANA Invoice Number : DEL5-21033 Invoice Details : HR-DEL5-918080915-2324 Invoice Date : 30.05.2023 Description at Tax |Tax /|Tax Total p y Rate |Type |Amount|Amount Black) | BO8BHLZHBH ( ACS01744INP ) HSN:39269099 1 |Spigen Liquid Air Back Cover Case for iPhone 12 Mini (TPU | Matte 1846.62] 1 |%846.62| 9% |CGST! %76.19 |%999.00 9% |SGST| %76.19 TOTAL: Amount in Words: Nine Hundred Ninety-nine only Whether tax is payable under reverse charge - No For Spigen India Pvt. Ltd.: sSoigenrn Authorized Signatory Payment Transaction ID: Date & Time: 30/05/2023, 10:48:43 Invoice Value: Mode of Payment: Credit 2rs9ZEF8BwU9VmWiCc2Us hrs 999.00 Card *ASSPL-Amazon Seller Services Pvt. Ltd., ARIPL-Amazon Retail India Pvt. Ltd. (only where Amazon Retail India Pvt. Ltd. fulfillment center is co-located) Customers desirous of availing input GST credit are requested to create a Business account and purchase on Amazon.in/business from Business eligible offers Please note that this invoice is not a demand for payment Page 1 of 1""" # result = chain.prompt.format_prompt(text=text, format_instructions=fixing_parser.get_format_instructions()) # print(result.json(indent=4)) result = chain.generate(input_list=[{"text": text, "format_instructions": fixing_parser.get_format_instructions()}]) print(result) result = fixing_parser.parse_with_prompt(result.generations[0][0].text, chain.prompt.format_prompt(text=text, format_instructions=fixing_parser.get_format_instructions())) print(result) # result = chain.run(text=text, format_instructions=output_parser.get_format_instructions(), verbose=True) # print(result)