Spaces:
Build error
Build error
File size: 5,277 Bytes
0d99179 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
from enum import Enum
from typing import Union
# from . import vendor
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers import PydanticOutputParser
from langchain.output_parsers.enum import EnumOutputParser
from langchain.prompts import (ChatPromptTemplate, HumanMessagePromptTemplate,
SystemMessagePromptTemplate)
from pydantic import BaseModel
from . import accomodation, random_, travel_cab, travel_flight
class Category(Enum):
ACCOMODATION = "ACCOMODATION"
TRAVEL_FLIGHT = "TRAVEL_FLIGHT"
TRAVEL_CAB = "TRAVEL_CAB"
# VENDOR = "VENDOR"
RANDOM = "RANDOM"
category_modules = {
Category.ACCOMODATION: accomodation,
Category.TRAVEL_FLIGHT: travel_flight,
Category.TRAVEL_CAB: travel_cab,
# Category.VENDOR: vendor,
Category.RANDOM: random_,
}
model = ChatOpenAI(
temperature=0,
n=1,
# max_tokens=300,
model_kwargs={
"stop": None,
"top_p": 1,
"frequency_penalty": 0,
"presence_penalty": 0,
},
)
# Build categorizing chain
system_message_prompt = SystemMessagePromptTemplate.from_template(
"You are a classifier that, given a bill's text, states what type of bill "
"category it belongs to: accomodation (bills regarding stays), travel (bills "
"concerning cab or other land rides), travel (bills concerning flights), random "
"(bills concerning deliveries from e-commerce websites like amazon etc) bills.\n"
"You may want to see if there are Room Details, Check-in/Check-out Date for "
"Accomodation stay; Flight Details, Train Details, Bus Details Cab details for "
"Travel; Conference Details for Conference organizers; anything else comes under "
"random category. Your answers must be only the appropriate choice e.g. 'option' and "
"not 'The given bill belongs to the option category.'\n"
"{format_instructions}"
)
human_message_prompt = HumanMessagePromptTemplate.from_template("{text}")
chat_prompt = ChatPromptTemplate.from_messages(
[system_message_prompt, human_message_prompt]
)
category_parser = EnumOutputParser(enum=Category)
categorize_chain = LLMChain(
llm=model, prompt=chat_prompt, output_parser=category_parser
)
def categorize_text(text: str) -> Category:
"""Categories the text into one of the categories defined in Category by querying
ChatGPT.
Args:
text(str): The text to categorize.
Returns: The category of the text.
"""
return categorize_chain.run(
text=text, format_instructions=category_parser.get_format_instructions()
)
def run_category_chain(category: Category, text: str) -> Union[BaseModel, None]:
"""Runs the chain for the given category on the given text.
Args:
category(Category): The category for which the chain is to be run.
text(str): The text on which the chain is to be run.
Returns: The output of the chain.
"""
output_parser = category_modules[category].output_parser
try:
return category_modules[category].chain.run(
text=text, format_instructions=output_parser.get_format_instructions()
)
except Exception as e:
print("Error in running chain for category", category, ":", e)
if __name__ == "__main__":
text = """amazonin
we)
Sold By :
Spigen India Pvt. Ltd.
* Rect/Killa Nos. 38//8/2 min, 192//22/1,196//2/1/1,
37//15/1, 15/2,, Adjacent to Starex School, Village
- Binola, National Highway -8, Tehsil - Manesar
Gurgaon, Haryana, 122413
IN
PAN No: ABACS5056L
GST Registration No: O6ABACS5056L12Z5
Order Number: 407-5335982-7837125
Order Date: 30.05.2023
Tax Invoice/Bill of Supply/Cash Memo
(Original for Recipient)
Billing Address :
Praveen Bohra
E-303, ParkView City 2, Sector 49, Sohna Road
GURGAON, HARYANA, 122018
IN
State/UT Code: 06
Shipping Address :
Praveen Bohra
Praveen Bohra
E-303, ParkView City 2, Sector 49, Sohna Road
GURGAON, HARYANA, 122018
IN
State/UT Code: 06
Place of supply: HARYANA
Place of delivery: HARYANA
Invoice Number : DEL5-21033
Invoice Details : HR-DEL5-918080915-2324
Invoice Date : 30.05.2023
Description at Tax |Tax /|Tax Total
p y Rate |Type |Amount|Amount
Black) | BO8BHLZHBH ( ACS01744INP )
HSN:39269099
1 |Spigen Liquid Air Back Cover Case for iPhone 12 Mini (TPU | Matte
1846.62] 1 |%846.62| 9% |CGST! %76.19 |%999.00
9% |SGST| %76.19
TOTAL:
Amount in Words:
Nine Hundred Ninety-nine only
Whether tax is payable under reverse charge - No
For Spigen India Pvt. Ltd.:
sSoigenrn
Authorized Signatory
Payment Transaction ID: Date & Time: 30/05/2023, 10:48:43 Invoice Value: Mode of Payment: Credit
2rs9ZEF8BwU9VmWiCc2Us hrs 999.00 Card
*ASSPL-Amazon Seller Services Pvt. Ltd., ARIPL-Amazon Retail India Pvt. Ltd. (only where Amazon Retail India Pvt. Ltd. fulfillment center is co-located)
Customers desirous of availing input GST credit are requested to create a Business account and purchase on Amazon.in/business from Business eligible offers
Please note that this invoice is not a demand for payment
Page 1 of 1"""
category = categorize_text(text)
print("Category:", category)
print("\n\n")
result = run_category_chain(category, text)
print(result)
|