File size: 5,277 Bytes
0d99179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
from enum import Enum
from typing import Union

# from . import vendor
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers import PydanticOutputParser
from langchain.output_parsers.enum import EnumOutputParser
from langchain.prompts import (ChatPromptTemplate, HumanMessagePromptTemplate,
                               SystemMessagePromptTemplate)
from pydantic import BaseModel

from . import accomodation, random_, travel_cab, travel_flight


class Category(Enum):
    ACCOMODATION = "ACCOMODATION"
    TRAVEL_FLIGHT = "TRAVEL_FLIGHT"
    TRAVEL_CAB = "TRAVEL_CAB"
    # VENDOR = "VENDOR"
    RANDOM = "RANDOM"


category_modules = {
    Category.ACCOMODATION: accomodation,
    Category.TRAVEL_FLIGHT: travel_flight,
    Category.TRAVEL_CAB: travel_cab,
    # Category.VENDOR: vendor,
    Category.RANDOM: random_,
}

model = ChatOpenAI(
    temperature=0,
    n=1,
    # max_tokens=300,
    model_kwargs={
        "stop": None,
        "top_p": 1,
        "frequency_penalty": 0,
        "presence_penalty": 0,
    },
)

# Build categorizing chain
system_message_prompt = SystemMessagePromptTemplate.from_template(
    "You are a classifier that, given a bill's text, states what type of bill "
    "category it belongs to: accomodation (bills regarding stays), travel (bills "
    "concerning cab or other land rides), travel (bills concerning flights), random "
    "(bills concerning deliveries from e-commerce websites like amazon etc) bills.\n"
    "You may want to see if there are Room Details, Check-in/Check-out Date for "
    "Accomodation stay; Flight Details, Train Details, Bus Details Cab details for "
    "Travel; Conference Details for Conference organizers; anything else comes under "
    "random category. Your answers must be only the appropriate choice e.g. 'option' and "
    "not 'The given bill belongs to the option category.'\n"
    "{format_instructions}"
)
human_message_prompt = HumanMessagePromptTemplate.from_template("{text}")
chat_prompt = ChatPromptTemplate.from_messages(
    [system_message_prompt, human_message_prompt]
)
category_parser = EnumOutputParser(enum=Category)
categorize_chain = LLMChain(
    llm=model, prompt=chat_prompt, output_parser=category_parser
)


def categorize_text(text: str) -> Category:
    """Categories the text into one of the categories defined in Category by querying
    ChatGPT.

    Args:
        text(str): The text to categorize.

    Returns: The category of the text.
    """
    return categorize_chain.run(
        text=text, format_instructions=category_parser.get_format_instructions()
    )


def run_category_chain(category: Category, text: str) -> Union[BaseModel, None]:
    """Runs the chain for the given category on the given text.

    Args:
        category(Category): The category for which the chain is to be run.
        text(str): The text on which the chain is to be run.

    Returns: The output of the chain.
    """
    output_parser = category_modules[category].output_parser
    try:
        return category_modules[category].chain.run(
            text=text, format_instructions=output_parser.get_format_instructions()
        )
    except Exception as e:
        print("Error in running chain for category", category, ":", e)


if __name__ == "__main__":
    text = """amazonin
we)

Sold By :

Spigen India Pvt. Ltd.

* Rect/Killa Nos. 38//8/2 min, 192//22/1,196//2/1/1,     
37//15/1, 15/2,, Adjacent to Starex School, Village      
- Binola, National Highway -8, Tehsil - Manesar
Gurgaon, Haryana, 122413

IN

PAN No: ABACS5056L
GST Registration No: O6ABACS5056L12Z5

Order Number: 407-5335982-7837125
Order Date: 30.05.2023

Tax Invoice/Bill of Supply/Cash Memo
(Original for Recipient)

Billing Address :

Praveen Bohra

E-303, ParkView City 2, Sector 49, Sohna Road
GURGAON, HARYANA, 122018

IN

State/UT Code: 06

Shipping Address :

Praveen Bohra

Praveen Bohra

E-303, ParkView City 2, Sector 49, Sohna Road
GURGAON, HARYANA, 122018

IN

State/UT Code: 06

Place of supply: HARYANA

Place of delivery: HARYANA

Invoice Number : DEL5-21033
Invoice Details : HR-DEL5-918080915-2324
Invoice Date : 30.05.2023

Description at Tax |Tax /|Tax Total
p y Rate |Type |Amount|Amount

Black) | BO8BHLZHBH ( ACS01744INP )
HSN:39269099

1 |Spigen Liquid Air Back Cover Case for iPhone 12 Mini (TPU | Matte
1846.62] 1 |%846.62| 9% |CGST! %76.19 |%999.00
9% |SGST| %76.19

TOTAL:

Amount in Words:
Nine Hundred Ninety-nine only

Whether tax is payable under reverse charge - No

For Spigen India Pvt. Ltd.:
sSoigenrn

Authorized Signatory

Payment Transaction ID: Date & Time: 30/05/2023, 10:48:43 Invoice Value: Mode of Payment: Credit
2rs9ZEF8BwU9VmWiCc2Us hrs 999.00 Card

*ASSPL-Amazon Seller Services Pvt. Ltd., ARIPL-Amazon Retail India Pvt. Ltd. (only where Amazon Retail India Pvt. Ltd. fulfillment center is co-located)

Customers desirous of availing input GST credit are requested to create a Business account and purchase on Amazon.in/business from Business eligible offers

Please note that this invoice is not a demand for payment

Page 1 of 1"""
    category = categorize_text(text)
    print("Category:", category)

    print("\n\n")
    result = run_category_chain(category, text)
    print(result)