Spaces:
Sleeping
Sleeping
from .model import InformationExtractedFromABillReceipt as PydanticModel | |
from langchain.chains import LLMChain | |
from langchain.chat_models import ChatOpenAI | |
from langchain.output_parsers import PydanticOutputParser, OutputFixingParser | |
from langchain.prompts import ( | |
ChatPromptTemplate, | |
HumanMessagePromptTemplate, | |
SystemMessagePromptTemplate, | |
) | |
model = ChatOpenAI( | |
temperature=0, | |
n=1, | |
model_kwargs={ | |
'stop': None, | |
'top_p': 1, | |
'frequency_penalty': 0, | |
'presence_penalty': 0, | |
} | |
) | |
# Build category chain | |
system_message_prompt = SystemMessagePromptTemplate.from_template( | |
"You are an information extraction engine that outputs details from OCR processed " | |
"documents like uids, total, tax, name, currency, date, seller details, summary. You " | |
"may use context to make an educated guess about the currency. Use null if you are " | |
"unable to find certain details\n" | |
"{format_instructions}" | |
) | |
human_message_prompt = HumanMessagePromptTemplate.from_template("{text}") | |
chat_prompt = ChatPromptTemplate.from_messages( | |
[system_message_prompt, human_message_prompt] | |
) | |
output_parser = PydanticOutputParser(pydantic_object=PydanticModel) | |
fixing_parser = OutputFixingParser.from_llm(llm=model, parser=output_parser) | |
chain = LLMChain(llm=model, prompt=chat_prompt, output_parser=fixing_parser) | |
if __name__ == "__main__": | |
text = """amazonin | |
we) | |
Sold By : | |
Spigen India Pvt. Ltd. | |
* Rect/Killa Nos. 38//8/2 min, 192//22/1,196//2/1/1, | |
37//15/1, 15/2,, Adjacent to Starex School, Village | |
- Binola, National Highway -8, Tehsil - Manesar | |
Gurgaon, Haryana, 122413 | |
IN | |
PAN No: ABACS5056L | |
GST Registration No: O6ABACS5056L12Z5 | |
Order Number: 407-5335982-7837125 | |
Order Date: 30.05.2023 | |
Tax Invoice/Bill of Supply/Cash Memo | |
(Original for Recipient) | |
Billing Address : | |
Praveen Bohra | |
E-303, ParkView City 2, Sector 49, Sohna Road | |
GURGAON, HARYANA, 122018 | |
IN | |
State/UT Code: 06 | |
Shipping Address : | |
Praveen Bohra | |
Praveen Bohra | |
E-303, ParkView City 2, Sector 49, Sohna Road | |
GURGAON, HARYANA, 122018 | |
IN | |
State/UT Code: 06 | |
Place of supply: HARYANA | |
Place of delivery: HARYANA | |
Invoice Number : DEL5-21033 | |
Invoice Details : HR-DEL5-918080915-2324 | |
Invoice Date : 30.05.2023 | |
Description at Tax |Tax /|Tax Total | |
p y Rate |Type |Amount|Amount | |
Black) | BO8BHLZHBH ( ACS01744INP ) | |
HSN:39269099 | |
1 |Spigen Liquid Air Back Cover Case for iPhone 12 Mini (TPU | Matte | |
1846.62] 1 |%846.62| 9% |CGST! %76.19 |%999.00 | |
9% |SGST| %76.19 | |
TOTAL: | |
Amount in Words: | |
Nine Hundred Ninety-nine only | |
Whether tax is payable under reverse charge - No | |
For Spigen India Pvt. Ltd.: | |
sSoigenrn | |
Authorized Signatory | |
Payment Transaction ID: Date & Time: 30/05/2023, 10:48:43 Invoice Value: Mode of Payment: Credit | |
2rs9ZEF8BwU9VmWiCc2Us hrs 999.00 Card | |
*ASSPL-Amazon Seller Services Pvt. Ltd., ARIPL-Amazon Retail India Pvt. Ltd. (only where Amazon Retail India Pvt. Ltd. fulfillment center is co-located) | |
Customers desirous of availing input GST credit are requested to create a Business account and purchase on Amazon.in/business from Business eligible offers | |
Please note that this invoice is not a demand for payment | |
Page 1 of 1""" | |
# result = chain.prompt.format_prompt(text=text, format_instructions=fixing_parser.get_format_instructions()) | |
# print(result.json(indent=4)) | |
result = chain.generate(input_list=[{"text": text, "format_instructions": fixing_parser.get_format_instructions()}]) | |
print(result) | |
result = fixing_parser.parse_with_prompt(result.generations[0][0].text, chain.prompt.format_prompt(text=text, format_instructions=fixing_parser.get_format_instructions())) | |
print(result) | |
# result = chain.run(text=text, format_instructions=output_parser.get_format_instructions(), verbose=True) | |
# print(result) | |