suhasg commited on
Commit
162f3ee
·
1 Parent(s): 584ab38

first commit

Browse files
Files changed (4) hide show
  1. .env +1 -0
  2. app.py +307 -0
  3. flight.jsnol +53 -0
  4. requirements.txt +6 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ API_KEY="uN2laCeHROtues7F46DM9f1gV1tTVwAorXw5adna"
app.py ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import json
3
+ from langchain.embeddings.openai import OpenAIEmbeddings
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.vectorstores import Chroma
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.embeddings import HuggingFaceEmbeddings
8
+ from langchain.document_loaders import JSONLoader
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ import pandas as pd
11
+ from langchain.vectorstores import Chroma
12
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
13
+ from langchain.embeddings import SentenceTransformerEmbeddings
14
+ import os
15
+ import cohere
16
+ from dotenv import load_dotenv
17
+
18
+
19
+ load_dotenv()
20
+ api_key = os.getenv('API_KEY')
21
+
22
+
23
+ client = cohere.Client(api_key)
24
+
25
+ # Define functions
26
+ def split_docs(documents, chunk_size=1000, chunk_overlap=20):
27
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
28
+ docs = text_splitter.split_documents(documents)
29
+ return docs
30
+
31
+ def make_api_call(gpt_assistant_prompt, gpt_user_prompt):
32
+ message = [{"role": "assistant", "content": gpt_assistant_prompt}, {"role": "user", "content": gpt_user_prompt}]
33
+ temperature = 0.0
34
+ max_tokens = 1000
35
+ #frequency_penalty = 0.0
36
+
37
+ response = client.chat(
38
+ model="command",
39
+ messages=message,
40
+ temperature=temperature,
41
+ max_tokens=max_tokens,
42
+ )
43
+ return response.text
44
+
45
+ def condition_split(query):
46
+ structure = {
47
+ "condition_for_eligibility": {
48
+ "logic": "form the logic from query above like (A and B) or C",
49
+ "definitions": {
50
+ "condition": "the text Condition extracted from the query"
51
+ }
52
+ },
53
+ "promotion_offered": "Description of Promotion",
54
+ "scheduling": "Offer Scheduling Details"
55
+ }
56
+ template_initial = f"""
57
+ Given a text description of a customer offer that outlines eligibility criteria, the promotion offered, and the scheduling of the offer, your task is to parse the text and organize the information into a structured JSON format. The JSON structure should include three main components: condition_for_eligibility, promotion_offered, and scheduling. Use logical expressions to detail the conditions for eligibility.
58
+
59
+ Text Description:
60
+ {query}
61
+ Your Objectives:
62
+
63
+ Extract and Label Conditions for Eligibility:
64
+
65
+ Identify specific metrics or actions (e.g., number of calls) that define eligibility.
66
+ Use labels (A, B, C) for distinct conditions.
67
+ Determine the logical relationship between these conditions (e.g., (A and B) or C).
68
+ Outline the Promotion Offered:
69
+
70
+ Identify the key benefit or discount promised to eligible customers.
71
+ Determine the Scheduling for the Offer:
72
+
73
+ Capture the frequency or timing of when the offer is made (e.g., daily, weekly).
74
+ Structure Your Response as Follows:
75
+
76
+ the structure is
77
+
78
+ {structure}
79
+ """
80
+ gpt_assistant_prompt = """You are an expert in Semantic Understanding, Marketing, and Business Analysis.
81
+ You need to extract specific information like conditions, scheduling details, and promotions from a paragraph, typically blending technical skills and domain-specific knowledge.
82
+ Give only the JSON as the response.
83
+ """
84
+ gpt_user_prompt = template_initial
85
+ resp = make_api_call(gpt_assistant_prompt, gpt_user_prompt)
86
+ start_index = resp.find("{")
87
+ end_index = resp.rfind("}")
88
+ resp = resp.replace("'", '"')
89
+ json_data_string = resp[start_index:end_index + 1]
90
+ json_data = json.loads(json_data_string)
91
+ return json_data
92
+ def make_prompt(query, db1):
93
+ matching_docs = db1.similarity_search(query, k=4)
94
+ li = []
95
+ for doc in matching_docs:
96
+ js = json.loads(doc.page_content)
97
+ s = f"{js['kpis']}: {js['description']}"
98
+ li.append(s)
99
+ final = ",/n ".join(li)
100
+
101
+ prompt_template = f"""
102
+ Given the following user query and matched information if the KPI's:
103
+
104
+ User Query: {query}
105
+
106
+ Matched KPI's:
107
+ {final}
108
+
109
+ Write a condition to fulfill the user query.
110
+
111
+ IMPORTANT:
112
+ - Use only the KPI's mentioned in the Matched KPI's.
113
+ - Associate KPI's with the corresponding descriptions provided in the Matched KPI's.
114
+ - provide only the condition in response
115
+ - only the exact KPI and condition value is to be present in the response no other text should be present.
116
+ - only use one matching KPI or the closer once
117
+ """
118
+ gpt_assistant_prompt = """You are an expert in Semantic Understanding, Marketing, and Business Analysis.
119
+ You need to check whether the text matches. If the matching probability is high, then proceed with the requirement.
120
+ """
121
+
122
+ resp = make_api_call(gpt_assistant_prompt, prompt_template)
123
+ return str(resp)
124
+ def replace_with_shielding(json_data, condition_string):
125
+ placeholders = {key: f"__{key}__" for key in json_data.keys()}
126
+ for key, placeholder in placeholders.items():
127
+ condition_string = condition_string.replace(key, placeholder)
128
+
129
+ stop_processing = False
130
+ for key, value in json_data.items():
131
+ if stop_processing:
132
+ break
133
+
134
+ if "The provided KPI's do not match the user query." in str(value):
135
+ stop_processing = True
136
+ condition_string = condition_string.replace(placeholders[key], "Description not matching with kpi so update kpi and description in kpi's json file")
137
+ else:
138
+ condition_string = condition_string.replace(placeholders[key], str(value))
139
+
140
+ if "Description not matching with kpi so update kpi and description in kpi's json file" in condition_string:
141
+ stop_index = condition_string.index("Description not matching with kpi so update kpi and description in kpi's json file")
142
+ condition_string = condition_string[:stop_index] + "Description not matching with kpi so update kpi and description in kpi's json file"
143
+
144
+ return condition_string
145
+ def form_json_rule(condition_string):
146
+ final_template = """
147
+ The task is to generate a json format with the help of english text.
148
+ I will help you with some details about the conversion.
149
+ a sample rule json structure is
150
+ {"featureId":"","appName":"","username":"","password":"","reqTxnId":"","msgOrigin":"","msgDest":"","timestamp":"","id":"","ruletype":"","data":{"detail":{"rules":{"id":"0","pid":"#","childrens":[{"id":"0_0","pid":"0","type":"conditions","option":"All","childrens":[{"id":"0_0_0","pid":"0_0","type":"condition","profile":{"id":1,"name":"P_AON"},"operator":">","values":{"value":"30"}},{"id":"0_0_1","pid":"0_0","type":"condition","profile":{"id":862,"name":"P_DEVICE_TYPE"},"operator":"=","values":{"value":"PHONE"}},{"id":"0_0_2","pid":"0_0","type":"action","action":{"id":98,"name":"Mobile App Notification"},"field":[{"name":"ActionID","value":"0_0_2"},{"name":"ActionName","value":""},{"name":"ActionCall","value":""}],"request":{"field":[]}}]}]}}}}
151
+
152
+ Root Level Properties
153
+ featureId: String. A unique identifier for the feature.
154
+ appName: String. The name of the application.
155
+ username: String. The username for authentication.
156
+ password: String. The password for authentication.
157
+ reqTxnId: String. A unique transaction identifier.
158
+ msgOrigin: String. The origin of the message.
159
+ msgDest: String. The destination of the message.
160
+ timestamp: String (ISO 8601 format). The timestamp of the request or action.
161
+ id: String. A unique identifier for this particular instance.
162
+ ruletype: String. The type of rule being defined.
163
+ Data and Detail Section
164
+ data: Object container.
165
+ detail: Object within 'data'.
166
+ rules: Object representing the rule logic.
167
+ id: String. The unique identifier of the rule.
168
+ pid: String. The parent identifier of the rule.
169
+ childrens: Array of child objects. Each object represents a condition or an action.
170
+ type: String. Specifies if it's a 'condition' or 'action'
171
+ if inside childrens if type is condition then the option can come as a logical operator like 'All' or 'Any'.
172
+ ALL is like an and operation where as Any is like an OR operation
173
+ If type is 'any' or 'and', it contains a conditions array with condition objects.
174
+ If type is 'condition' or 'action', it follows the respective structures below.
175
+ Condition Structure
176
+ id: String. The unique identifier of the condition.
177
+ pid: String. The parent identifier of the condition.
178
+ profile: Object containing details of the condition.
179
+ name: String. The name of the condition.
180
+ id: String. The unique identifier of the condition.
181
+ operator: String. The operation applied in the condition (e.g., '=', '<>').
182
+ values: String. The values to check against in the condition.
183
+ isTextMode: Boolean. Indicates text mode evaluation.
184
+ Action Structure
185
+ id: String. The unique identifier of the action.
186
+ pid: String. The parent identifier of the action.
187
+ action: Object containing details of the action.
188
+ name: String. The name of the action.
189
+ id: String. The unique identifier of the action.
190
+ field: Object of parameters related to the action (key-value pairs).
191
+ request (Optional): Object detailing an external service request (key-value pairs).
192
+ isTextMode: Boolean. Indicates text mode processing.
193
+ Schedule Section
194
+ schedule: Object defining scheduling details.
195
+ field: Array of scheduling parameter objects.
196
+ Each object contains scheduling details (key-value pairs) like ScheduleName, ScheduleType, CAMPAIGN_NAME, ExpiryDate.
197
+
198
+ now from the information provided below convert it to json
199
+ shielded_replaced_string
200
+ """
201
+ final_template = final_template.replace("shielded_replaced_string", condition_string)
202
+ gpt_assistant_prompt2 = """You are a json rule maker. You make rules based on the structure provided.
203
+ IMPORTANT:
204
+ Only give the json as response no other text should be present.
205
+ Give the response json in compact form.
206
+ """
207
+ resp = make_api_call(gpt_assistant_prompt2, final_template)
208
+ return resp
209
+ # Streamlit App
210
+ def main():
211
+ st.title("Flight Data Analysis")
212
+
213
+ uploaded_file = st.file_uploader("Choose a JSON file", type="jsnol")
214
+
215
+ if uploaded_file is not None:
216
+ with open("flight.jsnol", "wb") as f:
217
+ f.write(uploaded_file.getvalue())
218
+
219
+ documents = JSONLoader(file_path='flight.jsnol', jq_schema='.', text_content=False, json_lines=True).load()
220
+
221
+ query = st.text_input("Enter your query")
222
+
223
+ if st.button("Split Documents"):
224
+ docs = split_docs(documents)
225
+ st.write(f"Number of documents: {len(docs)}")
226
+ st.json(docs)
227
+
228
+ if st.button("Create Embeddings and Vector Store"):
229
+ docs = split_docs(documents)
230
+ embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
231
+ db1 = Chroma.from_documents(
232
+ documents=docs,
233
+ embedding=embeddings,
234
+ persist_directory="embeddings"
235
+ )
236
+ db1.persist()
237
+ st.write("Embeddings and vector store created.")
238
+
239
+ if st.button("Condition Split"):
240
+ if query:
241
+ json_data = condition_split(query)
242
+ st.write("JSON Data:")
243
+ st.json(json_data)
244
+ else:
245
+ st.write("Please enter a query.")
246
+
247
+ if st.button("Make Prompt"):
248
+ if query:
249
+ docs = split_docs(documents)
250
+ embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
251
+ db1 = Chroma.from_documents(
252
+ documents=docs,
253
+ embedding=embeddings,
254
+ persist_directory="embeddings"
255
+ )
256
+ db1.persist()
257
+ condition_string = make_prompt(query, db1)
258
+ st.write("Condition String:")
259
+ st.write(condition_string)
260
+ else:
261
+ st.write("Please enter a query.")
262
+
263
+ if st.button("Replace with Shielding"):
264
+ if query:
265
+ docs = split_docs(documents)
266
+ embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
267
+ db1 = Chroma.from_documents(
268
+ documents=docs,
269
+ embedding=embeddings,
270
+ persist_directory="embeddings"
271
+ )
272
+ db1.persist()
273
+ condition_string = make_prompt(query, db1)
274
+ json_data = condition_split(query)
275
+ shielded_replaced_string = replace_with_shielding(json_data, condition_string)
276
+ st.write("Shielded Replaced String:")
277
+ st.write(shielded_replaced_string)
278
+ else:
279
+ st.write("Please enter a query.")
280
+
281
+ if st.button("Form JSON Rule"):
282
+ if query:
283
+ docs = split_docs(documents)
284
+ embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
285
+ db1 = Chroma.from_documents(
286
+ documents=docs,
287
+ embedding=embeddings,
288
+ persist_directory="embeddings"
289
+ )
290
+ db1.persist()
291
+ condition_string = make_prompt(query, db1)
292
+ json_data = condition_split(query)
293
+ shielded_replaced_string = replace_with_shielding(json_data, condition_string)
294
+ json_rule = form_json_rule(shielded_replaced_string)
295
+ st.write("JSON Rule:")
296
+ st.json(json_rule)
297
+ st.download_button(
298
+ label="Download JSON Rule",
299
+ data=json.dumps(json_rule),
300
+ file_name="json_rule.json",
301
+ mime="application/json"
302
+ )
303
+ else:
304
+ st.write("Please enter a query.")
305
+
306
+ if __name__ == '__main__':
307
+ main()
flight.jsnol ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"kpis": "hotel_id", "description": "A unique identifier assigned to each hotel. This ID helps in maintaining a distinct record for each hotel and acts as a primary key. It's also used for referencing in other tables like Rooms."}
2
+ {"kpis": "hotel_name", "description": "The official name of the hotel. This column provides users with the name of the hotel they are booking or viewing. It aids in branding and recognition."}
3
+ {"kpis": "location", "description": "Represents the city or area where the hotel is situated. This field helps users in filtering hotels based on their preferred destination."}
4
+ {"kpis": "rating", "description": "Represents the average rating of the hotel, based on user reviews. Users often sort or filter hotels based on ratings to ensure they get the best experience. A higher rating usually indicates better customer satisfaction."}
5
+ {"kpis": "reservation_id", "description": "A unique identifier for each reservation made on the platform. This ID ensures that each booking is distinct and can be referenced for any customer queries or modifications."}
6
+ {"kpis": "user_id", "description": "A reference to a user from the Users table who made the reservation. Establishes which user made a specific booking, aiding in personalized user experiences and support."}
7
+ {"kpis": "room_id", "description": "Refers to a specific room type in a hotel from the Rooms table. Ensures that the booking corresponds to a specific type of room in a particular hotel."}
8
+ {"kpis": "start_date", "description": "Indicates the beginning date of the reservation. Helps in determining room availability and the user's stay period."}
9
+ {"kpis": "end_date", "description": "Marks the termination date of the reservation. Assists in room inventory management and billing."}
10
+ {"kpis": "room_id", "description": "A unique identifier for a specific room type in a hotel. This ID ensures that each room type in a hotel has a unique representation. It also plays a role in making reservations."}
11
+ {"kpis": "hotel_id", "description": "An identifier that references a hotel from the Hotels table. This foreign key establishes a link between the room and its respective hotel, ensuring that rooms are correctly mapped to hotels."}
12
+ {"kpis": "room_type", "description": "Categorizes rooms based on their features and amenities, e.g., Deluxe, Suite, etc. Users can choose a room based on their preferences, like a suite for luxurious stays or deluxe for standard ones."}
13
+ {"kpis": "price_per_night", "description": "Indicates the cost of booking the room for one night. Helps users in understanding the pricing and aids in budget planning."}
14
+ {"kpis": "availability", "description": "Specifies the number of such rooms available for booking. Ensures that overbooking doesn't occur and informs users about room scarcity."}
15
+ {"kpis": "customer_id", "description": "A unique identifier for each customer. Used for referencing customers in queries and transactions."}
16
+ {"kpis": "name", "description": "The full name of the customer. Used for personalizing customer interactions."}
17
+ {"kpis": "contact_details", "description": "Contact information of the customer, including phone number and email. Used for communication with the customer."}
18
+ {"kpis": "preferences", "description": "Stored preferences of the customer, such as room type and amenities. Used for tailoring recommendations and services to the customer."}
19
+ {"kpis": "hotel_id", "description": "A unique identifier for each hotel. Used for linking amenities to specific hotels."}
20
+ {"kpis": "amenity_type", "description": "The type of amenity offered, such as gym, pool, or spa. Used for filtering and listing amenities."}
21
+ {"kpis": "availability", "description": "Indicates whether the amenity is currently available. Used for real-time amenity status updates."}
22
+ {"kpis": "review_id", "description": "A unique identifier for each review. Used for referencing individual reviews."}
23
+ {"kpis": "customer_id", "description": "The customer who provided the review. Used for linking reviews to customers."}
24
+ {"kpis": "hotel_id", "description": "The hotel that the review pertains to. Used for aggregating reviews by hotel."}
25
+ {"kpis": "rating", "description": "The rating given by the customer, usually on a scale from 1 to 5. Used for calculating the average rating of hotels."}
26
+ {"kpis": "comments", "description": "Textual feedback provided by the customer. Used for qualitative analysis of customer satisfaction."}
27
+ {"kpis": "payment_id", "description": "A unique identifier for each payment transaction. Used for tracking and auditing payments."}
28
+ {"kpis": "customer_id", "description": "The customer who made the payment. Used for linking payments to customers."}
29
+ {"kpis": "amount", "description": "The amount of the payment transaction. Used for financial reporting and auditing."}
30
+ {"kpis": "status", "description": "The status of the payment, such as successful, pending, or failed. Used for real-time payment status tracking."}
31
+ {"kpis": "payment_method", "description": "The method used for payment, such as credit card or PayPal. Used for analytics and reporting."}
32
+ {"kpis": "discount_code", "description": "A unique code representing each discount. Used for applying discounts during payment."}
33
+ {"kpis": "description", "description": "A brief description of the discount. Used for informing customers about the discount."}
34
+ {"kpis": "eligibility", "description": "Criteria for eligibility, such as military personnel or membership status. Used for verifying discount eligibility."}
35
+ {"kpis": "expiry_date", "description": "The date on which the discount expires. Used for discount lifecycle management."}
36
+ {"kpis": "staff_id", "description": "A unique identifier for each staff member. Used for managing staff records."}
37
+ {"kpis": "name", "description": "The full name of the staff member. Used for identification and communication."}
38
+ {"kpis": "role", "description": "The role or position of the staff member. Used for assigning tasks and responsibilities."}
39
+ {"kpis": "contact_details", "description": "Contact information of the staff member. Used for internal communication."}
40
+ {"kpis": "work_schedule", "description": "The work schedule or shifts of the staff member. Used for staff management and scheduling."}
41
+ {"kpis": "event_id", "description": "A unique identifier for each event. Used for event management and ticketing."}
42
+ {"kpis": "hotel_id", "description": "The hotel where the event is hosted. Used for linking events to specific hotels."}
43
+ {"kpis": "event_type", "description": "The type of event, such as wedding, conference, or show. Used for categorizing and filtering events."}
44
+ {"kpis": "availability", "description": "Indicates whether tickets for the event are available. Used for real-time ticketing updates."}
45
+ {"kpis": "date", "description": "The date on which the event is scheduled. Used for event planning and scheduling."}
46
+ {"kpis": "flight_id", "description": "A unique identifier for each flight. Used for flight management and tracking."}
47
+ {"kpis": "airline", "description": "The airline operating the flight. Used for identifying the airline and its routes."}
48
+ {"kpis": "departure_airport", "description": "The airport from which the flight departs. Used for departure location information."}
49
+ {"kpis": "arrival_airport", "description": "The airport to which the flight arrives. Used for arrival location information."}
50
+ {"kpis": "departure_time", "description": "The scheduled departure time of the flight. Used for flight scheduling and planning."}
51
+ {"kpis": "arrival_time", "description": "The scheduled arrival time of the flight. Used for flight scheduling and planning."}
52
+ {"kpis": "seat_capacity", "description": "The total capacity of seats available on the flight. Used for seat allocation and booking."}
53
+ {"kpis": "remaining_seats", "description": "The number of seats still available for booking on the flight. Used for real-time seat availability updates."}
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ langchain
2
+ langchain-community
3
+ chromadb
4
+ sentence-transformers
5
+ cohere
6
+ pandas