Spaces:
Sleeping
Sleeping
from .model import InformationExtractedFromABillReceipt as PydanticModel | |
from langchain.chains import LLMChain | |
from langchain.chat_models import ChatOpenAI | |
from langchain.output_parsers import PydanticOutputParser, OutputFixingParser | |
from langchain.prompts import ( | |
ChatPromptTemplate, | |
HumanMessagePromptTemplate, | |
SystemMessagePromptTemplate, | |
) | |
model = ChatOpenAI( | |
temperature=0, | |
n=1, | |
model_kwargs= { | |
'stop': None, | |
'top_p': 1, | |
'frequency_penalty': 0, | |
'presence_penalty': 0, | |
} | |
) | |
# Build categorizing chain | |
system_message_prompt = SystemMessagePromptTemplate.from_template( | |
"You are an information extraction engine that outputs details from OCR processed " | |
"documents such as date/time/place of departure and arrival. Fields with formats " | |
"specified as date, time, or datetime should be ISO 8601 compliant. Fields with " | |
"formats specified as date, time, or datetime should be ISO 8601 compliant.\n" | |
"{format_instructions}" | |
) | |
human_message_prompt = HumanMessagePromptTemplate.from_template("{text}") | |
chat_prompt = ChatPromptTemplate.from_messages( | |
[system_message_prompt, human_message_prompt] | |
) | |
output_parser = PydanticOutputParser(pydantic_object=PydanticModel) | |
fixing_parser = OutputFixingParser.from_llm(llm=model, parser=output_parser) | |
chain = LLMChain( | |
llm=model, prompt=chat_prompt, output_parser=fixing_parser | |
) | |
if __name__ == '__main__': | |
misformatted_output = '''{ | |
"place_from": "Adarsh Palm Retreat, Bellandur, Bengaluru, Karnataka 560103, India", | |
"date_from": "08/05/2023", | |
"time_from": "10:05 AM", | |
"place_to": "135, Residency Rd, Shanthala Nagar, Ashok Nagar, Bengaluru, Karnataka 560025, India", | |
"date_to": "08/05/2023", | |
"time_to": "11:05 AM", | |
"amount": 474.54 | |
}''' | |
formatted_prompt = chain.prompt.format_prompt( | |
text='''08/05/2023, 14:56 Gmail - [Personal] Your Wednesday morning trip with Uber | |
https://mail.google.com/mail/u/0/?ik=9af0dfed43&view=pt&search=all&permthid=thread-f:1764850071084777313&simpl=msg-f:1764850071084777313 1/3 | |
Total ₹474.54 | |
May 3, 2023 | |
We hope you enjoyed your ride | |
this morning.Thanks for riding, Dhruv | |
P a y t m | |
5/3/23 11:06 AMDhruv Kumar <[email protected]> | |
[Personal] Your W ednesday morning trip with Uber | |
1 message | |
Uber Receipts <noreply@uber .com> 3 May 2023 at 1 1:06 | |
To: [email protected] | |
T o t a l ₹ 4 7 4 . 5 4 | |
Trip Charge ₹474.54 | |
Subtotal ₹474.54 | |
Rider Promotion -₹5.27 | |
Fare Adjustment ₹5.27 | |
P a y m e n t s | |
₹474.54 | |
A temporary hold of ₹474.54 was placed on your payment method Paytm. This is not a charge | |
and will be removed. It should disappear from your bank statement shortly. Learn More | |
Visit the trip page for more information, including invoices (where available) | |
08/05/2023, 14:56 Gmail - [Personal] Your Wednesday morning trip with Uber | |
https://mail.google.com/mail/u/0/?ik=9af0dfed43&view=pt&search=all&permthid=thread-f:1764850071084777313&simpl=msg-f:1764850071084777313 2/3 | |
License Plate: KA05AM2236You rode with SHANKARA NAGA | |
5 . 0 0 | |
R a t i n g | |
Rate or tip | |
10:05 AM | |
WMGP+PM2, Adarsh Palm | |
Retreat, Bellandur, Bengaluru, | |
Karnataka 560103, India | |
11:05 AM | |
135, Residency Rd, | |
Shanthala Nagar , Ashok | |
Nagar , Bengaluru, Karnataka | |
560025, India | |
Uber Go 15.77 kilometers | 1 h 0 min | |
Report lost item › Contact support› | |
My trips › The total of ₹474.54 has a GST of ₹38.52 included. | |
Switch Payment Method | |
Download PDF | |
08/05/2023, 14:56 Gmail - [Personal] Your Wednesday morning trip with Uber | |
https://mail.google.com/mail/u/0/?ik=9af0dfed43&view=pt&search=all&permthid=thread-f:1764850071084777313&simpl=msg-f:1764850071084777313 3/3 | |
Forgot password | |
Privacy | |
Terms | |
Uber India Systems Private | |
Limited | |
Fares are inclusive of GST. Please download the tax invoice from the trip detail page for a full tax breakdown. | |
''', | |
format_instructions=fixing_parser.get_format_instructions() | |
) | |
# print("Parsed:", fixing_parser.parse(misformatted_output)) | |
print("Using fixing parser:", fixing_parser.parse_with_prompt(misformatted_output, formatted_prompt)) |