ankur-bohra's picture
Add basic structure
0d99179
from enum import Enum
from typing import Union
# from . import vendor
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers import PydanticOutputParser
from langchain.output_parsers.enum import EnumOutputParser
from langchain.prompts import (ChatPromptTemplate, HumanMessagePromptTemplate,
SystemMessagePromptTemplate)
from pydantic import BaseModel
from . import accomodation, random_, travel_cab, travel_flight
class Category(Enum):
ACCOMODATION = "ACCOMODATION"
TRAVEL_FLIGHT = "TRAVEL_FLIGHT"
TRAVEL_CAB = "TRAVEL_CAB"
# VENDOR = "VENDOR"
RANDOM = "RANDOM"
category_modules = {
Category.ACCOMODATION: accomodation,
Category.TRAVEL_FLIGHT: travel_flight,
Category.TRAVEL_CAB: travel_cab,
# Category.VENDOR: vendor,
Category.RANDOM: random_,
}
model = ChatOpenAI(
temperature=0,
n=1,
# max_tokens=300,
model_kwargs={
"stop": None,
"top_p": 1,
"frequency_penalty": 0,
"presence_penalty": 0,
},
)
# Build categorizing chain
system_message_prompt = SystemMessagePromptTemplate.from_template(
"You are a classifier that, given a bill's text, states what type of bill "
"category it belongs to: accomodation (bills regarding stays), travel (bills "
"concerning cab or other land rides), travel (bills concerning flights), random "
"(bills concerning deliveries from e-commerce websites like amazon etc) bills.\n"
"You may want to see if there are Room Details, Check-in/Check-out Date for "
"Accomodation stay; Flight Details, Train Details, Bus Details Cab details for "
"Travel; Conference Details for Conference organizers; anything else comes under "
"random category. Your answers must be only the appropriate choice e.g. 'option' and "
"not 'The given bill belongs to the option category.'\n"
"{format_instructions}"
)
human_message_prompt = HumanMessagePromptTemplate.from_template("{text}")
chat_prompt = ChatPromptTemplate.from_messages(
[system_message_prompt, human_message_prompt]
)
category_parser = EnumOutputParser(enum=Category)
categorize_chain = LLMChain(
llm=model, prompt=chat_prompt, output_parser=category_parser
)
def categorize_text(text: str) -> Category:
"""Categories the text into one of the categories defined in Category by querying
ChatGPT.
Args:
text(str): The text to categorize.
Returns: The category of the text.
"""
return categorize_chain.run(
text=text, format_instructions=category_parser.get_format_instructions()
)
def run_category_chain(category: Category, text: str) -> Union[BaseModel, None]:
"""Runs the chain for the given category on the given text.
Args:
category(Category): The category for which the chain is to be run.
text(str): The text on which the chain is to be run.
Returns: The output of the chain.
"""
output_parser = category_modules[category].output_parser
try:
return category_modules[category].chain.run(
text=text, format_instructions=output_parser.get_format_instructions()
)
except Exception as e:
print("Error in running chain for category", category, ":", e)
if __name__ == "__main__":
text = """amazonin
we)
Sold By :
Spigen India Pvt. Ltd.
* Rect/Killa Nos. 38//8/2 min, 192//22/1,196//2/1/1,
37//15/1, 15/2,, Adjacent to Starex School, Village
- Binola, National Highway -8, Tehsil - Manesar
Gurgaon, Haryana, 122413
IN
PAN No: ABACS5056L
GST Registration No: O6ABACS5056L12Z5
Order Number: 407-5335982-7837125
Order Date: 30.05.2023
Tax Invoice/Bill of Supply/Cash Memo
(Original for Recipient)
Billing Address :
Praveen Bohra
E-303, ParkView City 2, Sector 49, Sohna Road
GURGAON, HARYANA, 122018
IN
State/UT Code: 06
Shipping Address :
Praveen Bohra
Praveen Bohra
E-303, ParkView City 2, Sector 49, Sohna Road
GURGAON, HARYANA, 122018
IN
State/UT Code: 06
Place of supply: HARYANA
Place of delivery: HARYANA
Invoice Number : DEL5-21033
Invoice Details : HR-DEL5-918080915-2324
Invoice Date : 30.05.2023
Description at Tax |Tax /|Tax Total
p y Rate |Type |Amount|Amount
Black) | BO8BHLZHBH ( ACS01744INP )
HSN:39269099
1 |Spigen Liquid Air Back Cover Case for iPhone 12 Mini (TPU | Matte
1846.62] 1 |%846.62| 9% |CGST! %76.19 |%999.00
9% |SGST| %76.19
TOTAL:
Amount in Words:
Nine Hundred Ninety-nine only
Whether tax is payable under reverse charge - No
For Spigen India Pvt. Ltd.:
sSoigenrn
Authorized Signatory
Payment Transaction ID: Date & Time: 30/05/2023, 10:48:43 Invoice Value: Mode of Payment: Credit
2rs9ZEF8BwU9VmWiCc2Us hrs 999.00 Card
*ASSPL-Amazon Seller Services Pvt. Ltd., ARIPL-Amazon Retail India Pvt. Ltd. (only where Amazon Retail India Pvt. Ltd. fulfillment center is co-located)
Customers desirous of availing input GST credit are requested to create a Business account and purchase on Amazon.in/business from Business eligible offers
Please note that this invoice is not a demand for payment
Page 1 of 1"""
category = categorize_text(text)
print("Category:", category)
print("\n\n")
result = run_category_chain(category, text)
print(result)