Spaces:
Sleeping
Sleeping
from enum import Enum | |
from typing import Union | |
# from . import vendor | |
from langchain.chains import LLMChain | |
from langchain.chat_models import ChatOpenAI | |
from langchain.output_parsers import PydanticOutputParser | |
from langchain.output_parsers.enum import EnumOutputParser | |
from langchain.prompts import (ChatPromptTemplate, HumanMessagePromptTemplate, | |
SystemMessagePromptTemplate) | |
from pydantic import BaseModel | |
from . import accomodation, random_, travel_cab, travel_flight | |
class Category(Enum): | |
ACCOMODATION = "ACCOMODATION" | |
TRAVEL_FLIGHT = "TRAVEL_FLIGHT" | |
TRAVEL_CAB = "TRAVEL_CAB" | |
# VENDOR = "VENDOR" | |
RANDOM = "RANDOM" | |
category_modules = { | |
Category.ACCOMODATION: accomodation, | |
Category.TRAVEL_FLIGHT: travel_flight, | |
Category.TRAVEL_CAB: travel_cab, | |
# Category.VENDOR: vendor, | |
Category.RANDOM: random_, | |
} | |
model = ChatOpenAI( | |
temperature=0, | |
n=1, | |
# max_tokens=300, | |
model_kwargs={ | |
"stop": None, | |
"top_p": 1, | |
"frequency_penalty": 0, | |
"presence_penalty": 0, | |
}, | |
) | |
# Build categorizing chain | |
system_message_prompt = SystemMessagePromptTemplate.from_template( | |
"You are a classifier that, given a bill's text, states what type of bill " | |
"category it belongs to: accomodation (bills regarding stays), travel (bills " | |
"concerning cab or other land rides), travel (bills concerning flights), random " | |
"(bills concerning deliveries from e-commerce websites like amazon etc) bills.\n" | |
"You may want to see if there are Room Details, Check-in/Check-out Date for " | |
"Accomodation stay; Flight Details, Train Details, Bus Details Cab details for " | |
"Travel; Conference Details for Conference organizers; anything else comes under " | |
"random category. Your answers must be only the appropriate choice e.g. 'option' and " | |
"not 'The given bill belongs to the option category.'\n" | |
"{format_instructions}" | |
) | |
human_message_prompt = HumanMessagePromptTemplate.from_template("{text}") | |
chat_prompt = ChatPromptTemplate.from_messages( | |
[system_message_prompt, human_message_prompt] | |
) | |
category_parser = EnumOutputParser(enum=Category) | |
categorize_chain = LLMChain( | |
llm=model, prompt=chat_prompt, output_parser=category_parser | |
) | |
def categorize_text(text: str) -> Category: | |
"""Categories the text into one of the categories defined in Category by querying | |
ChatGPT. | |
Args: | |
text(str): The text to categorize. | |
Returns: The category of the text. | |
""" | |
return categorize_chain.run( | |
text=text, format_instructions=category_parser.get_format_instructions() | |
) | |
def run_category_chain(category: Category, text: str) -> Union[BaseModel, None]: | |
"""Runs the chain for the given category on the given text. | |
Args: | |
category(Category): The category for which the chain is to be run. | |
text(str): The text on which the chain is to be run. | |
Returns: The output of the chain. | |
""" | |
output_parser = category_modules[category].output_parser | |
try: | |
return category_modules[category].chain.run( | |
text=text, format_instructions=output_parser.get_format_instructions() | |
) | |
except Exception as e: | |
print("Error in running chain for category", category, ":", e) | |
if __name__ == "__main__": | |
text = """amazonin | |
we) | |
Sold By : | |
Spigen India Pvt. Ltd. | |
* Rect/Killa Nos. 38//8/2 min, 192//22/1,196//2/1/1, | |
37//15/1, 15/2,, Adjacent to Starex School, Village | |
- Binola, National Highway -8, Tehsil - Manesar | |
Gurgaon, Haryana, 122413 | |
IN | |
PAN No: ABACS5056L | |
GST Registration No: O6ABACS5056L12Z5 | |
Order Number: 407-5335982-7837125 | |
Order Date: 30.05.2023 | |
Tax Invoice/Bill of Supply/Cash Memo | |
(Original for Recipient) | |
Billing Address : | |
Praveen Bohra | |
E-303, ParkView City 2, Sector 49, Sohna Road | |
GURGAON, HARYANA, 122018 | |
IN | |
State/UT Code: 06 | |
Shipping Address : | |
Praveen Bohra | |
Praveen Bohra | |
E-303, ParkView City 2, Sector 49, Sohna Road | |
GURGAON, HARYANA, 122018 | |
IN | |
State/UT Code: 06 | |
Place of supply: HARYANA | |
Place of delivery: HARYANA | |
Invoice Number : DEL5-21033 | |
Invoice Details : HR-DEL5-918080915-2324 | |
Invoice Date : 30.05.2023 | |
Description at Tax |Tax /|Tax Total | |
p y Rate |Type |Amount|Amount | |
Black) | BO8BHLZHBH ( ACS01744INP ) | |
HSN:39269099 | |
1 |Spigen Liquid Air Back Cover Case for iPhone 12 Mini (TPU | Matte | |
1846.62] 1 |%846.62| 9% |CGST! %76.19 |%999.00 | |
9% |SGST| %76.19 | |
TOTAL: | |
Amount in Words: | |
Nine Hundred Ninety-nine only | |
Whether tax is payable under reverse charge - No | |
For Spigen India Pvt. Ltd.: | |
sSoigenrn | |
Authorized Signatory | |
Payment Transaction ID: Date & Time: 30/05/2023, 10:48:43 Invoice Value: Mode of Payment: Credit | |
2rs9ZEF8BwU9VmWiCc2Us hrs 999.00 Card | |
*ASSPL-Amazon Seller Services Pvt. Ltd., ARIPL-Amazon Retail India Pvt. Ltd. (only where Amazon Retail India Pvt. Ltd. fulfillment center is co-located) | |
Customers desirous of availing input GST credit are requested to create a Business account and purchase on Amazon.in/business from Business eligible offers | |
Please note that this invoice is not a demand for payment | |
Page 1 of 1""" | |
category = categorize_text(text) | |
print("Category:", category) | |
print("\n\n") | |
result = run_category_chain(category, text) | |
print(result) | |