Spaces:

suhasg
/

cohere_json

Sleeping

App Files Files Community

suhasg commited on Jul 5, 2024

Commit

162f3ee

1 Parent(s): 584ab38

first commit

Browse files

Files changed (4) hide show

.env +1 -0
app.py +307 -0
flight.jsnol +53 -0
requirements.txt +6 -0

.env ADDED Viewed

	@@ -0,0 +1 @@


1	+ API_KEY="uN2laCeHROtues7F46DM9f1gV1tTVwAorXw5adna"

app.py ADDED Viewed

	@@ -0,0 +1,307 @@

+import streamlit as st
+import json
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.vectorstores import Chroma
+from langchain.vectorstores import FAISS
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.document_loaders import JSONLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+import pandas as pd
+from langchain.vectorstores import Chroma
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import SentenceTransformerEmbeddings
+import os
+import cohere
+from dotenv import load_dotenv
+load_dotenv()
+api_key = os.getenv('API_KEY')
+client = cohere.Client(api_key)
+# Define functions
+def split_docs(documents, chunk_size=1000, chunk_overlap=20):
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+    docs = text_splitter.split_documents(documents)
+    return docs
+def make_api_call(gpt_assistant_prompt, gpt_user_prompt):
+    message = [{"role": "assistant", "content": gpt_assistant_prompt}, {"role": "user", "content": gpt_user_prompt}]
+    temperature = 0.0
+    max_tokens = 1000
+    #frequency_penalty = 0.0
+    response = client.chat(
+        model="command",
+        messages=message,
+        temperature=temperature,
+        max_tokens=max_tokens,
+    )
+    return response.text
+def condition_split(query):
+    structure = {
+        "condition_for_eligibility": {
+            "logic": "form the logic from query above like (A and B) or C",
+            "definitions": {
+                "condition": "the text Condition extracted from the query"
+            }
+        },
+        "promotion_offered": "Description of Promotion",
+        "scheduling": "Offer Scheduling Details"
+    }
+    template_initial = f"""
+    Given a text description of a customer offer that outlines eligibility criteria, the promotion offered, and the scheduling of the offer, your task is to parse the text and organize the information into a structured JSON format. The JSON structure should include three main components: condition_for_eligibility, promotion_offered, and scheduling. Use logical expressions to detail the conditions for eligibility.
+    Text Description:
+    {query}
+    Your Objectives:
+    Extract and Label Conditions for Eligibility:
+    Identify specific metrics or actions (e.g., number of calls) that define eligibility.
+    Use labels (A, B, C) for distinct conditions.
+    Determine the logical relationship between these conditions (e.g., (A and B) or C).
+    Outline the Promotion Offered:
+    Identify the key benefit or discount promised to eligible customers.
+    Determine the Scheduling for the Offer:
+    Capture the frequency or timing of when the offer is made (e.g., daily, weekly).
+    Structure Your Response as Follows:
+    the structure is
+    {structure}
+    """
+    gpt_assistant_prompt = """You are an expert in Semantic Understanding, Marketing, and Business Analysis.
+    You need to extract specific information like conditions, scheduling details, and promotions from a paragraph, typically blending technical skills and domain-specific knowledge.
+    Give only the JSON as the response.
+    """
+    gpt_user_prompt = template_initial
+    resp = make_api_call(gpt_assistant_prompt, gpt_user_prompt)
+    start_index = resp.find("{")
+    end_index = resp.rfind("}")
+    resp = resp.replace("'", '"')
+    json_data_string = resp[start_index:end_index + 1]
+    json_data = json.loads(json_data_string)
+    return json_data
+def make_prompt(query, db1):
+    matching_docs = db1.similarity_search(query, k=4)
+    li = []
+    for doc in matching_docs:
+        js = json.loads(doc.page_content)
+        s = f"{js['kpis']}: {js['description']}"
+        li.append(s)
+    final = ",/n ".join(li)
+    prompt_template = f"""
+    Given the following user query and matched information if the KPI's:
+    User Query: {query}
+    Matched KPI's:
+    {final}
+    Write a condition to fulfill the user query.
+    IMPORTANT:
+    - Use only the KPI's mentioned in the Matched KPI's.
+    - Associate KPI's with the corresponding descriptions provided in the Matched KPI's.
+    - provide only the condition in response
+    - only the exact KPI and condition value is to be present in the response no other text should be present.
+    - only use one matching KPI or the closer once
+    """
+    gpt_assistant_prompt = """You are an expert in Semantic Understanding, Marketing, and Business Analysis.
+    You need to check whether the text matches. If the matching probability is high, then proceed with the requirement.
+    """
+    resp = make_api_call(gpt_assistant_prompt, prompt_template)
+    return str(resp)
+def replace_with_shielding(json_data, condition_string):
+    placeholders = {key: f"__{key}__" for key in json_data.keys()}
+    for key, placeholder in placeholders.items():
+        condition_string = condition_string.replace(key, placeholder)
+    stop_processing = False
+    for key, value in json_data.items():
+        if stop_processing:
+            break
+        if "The provided KPI's do not match the user query." in str(value):
+            stop_processing = True
+            condition_string = condition_string.replace(placeholders[key], "Description not matching with kpi so update kpi and description in kpi's json file")
+        else:
+            condition_string = condition_string.replace(placeholders[key], str(value))
+    if "Description not matching with kpi so update kpi and description in kpi's json file" in condition_string:
+        stop_index = condition_string.index("Description not matching with kpi so update kpi and description in kpi's json file")
+        condition_string = condition_string[:stop_index] + "Description not matching with kpi so update kpi and description in kpi's json file"
+    return condition_string
+def form_json_rule(condition_string):
+    final_template = """
+    The task is to generate a json format with the help of english text.
+    I will help you with some details about the conversion.
+    a sample rule json structure is
+    {"featureId":"","appName":"","username":"","password":"","reqTxnId":"","msgOrigin":"","msgDest":"","timestamp":"","id":"","ruletype":"","data":{"detail":{"rules":{"id":"0","pid":"#","childrens":[{"id":"0_0","pid":"0","type":"conditions","option":"All","childrens":[{"id":"0_0_0","pid":"0_0","type":"condition","profile":{"id":1,"name":"P_AON"},"operator":">","values":{"value":"30"}},{"id":"0_0_1","pid":"0_0","type":"condition","profile":{"id":862,"name":"P_DEVICE_TYPE"},"operator":"=","values":{"value":"PHONE"}},{"id":"0_0_2","pid":"0_0","type":"action","action":{"id":98,"name":"Mobile App Notification"},"field":[{"name":"ActionID","value":"0_0_2"},{"name":"ActionName","value":""},{"name":"ActionCall","value":""}],"request":{"field":[]}}]}]}}}}
+    Root Level Properties
+    featureId: String. A unique identifier for the feature.
+    appName: String. The name of the application.
+    username: String. The username for authentication.
+    password: String. The password for authentication.
+    reqTxnId: String. A unique transaction identifier.
+    msgOrigin: String. The origin of the message.
+    msgDest: String. The destination of the message.
+    timestamp: String (ISO 8601 format). The timestamp of the request or action.
+    id: String. A unique identifier for this particular instance.
+    ruletype: String. The type of rule being defined.
+    Data and Detail Section
+    data: Object container.
+    detail: Object within 'data'.
+    rules: Object representing the rule logic.
+    id: String. The unique identifier of the rule.
+    pid: String. The parent identifier of the rule.
+    childrens: Array of child objects. Each object represents a condition or an action.
+    type: String. Specifies if it's a 'condition' or 'action'
+    if inside childrens if type is condition then the option  can come as  a logical operator like 'All' or 'Any'.
+    ALL is like an and operation where as Any is like an OR operation
+    If type is 'any' or 'and', it contains a conditions array with condition objects.
+    If type is 'condition' or 'action', it follows the respective structures below.
+    Condition Structure
+    id: String. The unique identifier of the condition.
+    pid: String. The parent identifier of the condition.
+    profile: Object containing details of the condition.
+    name: String. The name of the condition.
+    id: String. The unique identifier of the condition.
+    operator: String. The operation applied in the condition (e.g., '=', '<>').
+    values: String. The values to check against in the condition.
+    isTextMode: Boolean. Indicates text mode evaluation.
+    Action Structure
+    id: String. The unique identifier of the action.
+    pid: String. The parent identifier of the action.
+    action: Object containing details of the action.
+    name: String. The name of the action.
+    id: String. The unique identifier of the action.
+    field: Object of parameters related to the action (key-value pairs).
+    request (Optional): Object detailing an external service request (key-value pairs).
+    isTextMode: Boolean. Indicates text mode processing.
+    Schedule Section
+    schedule: Object defining scheduling details.
+    field: Array of scheduling parameter objects.
+    Each object contains scheduling details (key-value pairs) like ScheduleName, ScheduleType, CAMPAIGN_NAME, ExpiryDate.
+    now from the information provided below convert it to json
+    shielded_replaced_string
+    """
+    final_template = final_template.replace("shielded_replaced_string", condition_string)
+    gpt_assistant_prompt2 = """You are a json rule maker. You make rules based on the structure provided.
+    IMPORTANT:
+    Only give the json as response no other text should be present.
+    Give the response json in compact form.
+    """
+    resp = make_api_call(gpt_assistant_prompt2, final_template)
+    return resp
+# Streamlit App
+def main():
+    st.title("Flight Data Analysis")
+    uploaded_file = st.file_uploader("Choose a JSON file", type="jsnol")
+    if uploaded_file is not None:
+        with open("flight.jsnol", "wb") as f:
+            f.write(uploaded_file.getvalue())
+        documents = JSONLoader(file_path='flight.jsnol', jq_schema='.', text_content=False, json_lines=True).load()
+        query = st.text_input("Enter your query")
+        if st.button("Split Documents"):
+            docs = split_docs(documents)
+            st.write(f"Number of documents: {len(docs)}")
+            st.json(docs)
+        if st.button("Create Embeddings and Vector Store"):
+            docs = split_docs(documents)
+            embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
+            db1 = Chroma.from_documents(
+                documents=docs,
+                embedding=embeddings,
+                persist_directory="embeddings"
+            )
+            db1.persist()
+            st.write("Embeddings and vector store created.")
+        if st.button("Condition Split"):
+            if query:
+                json_data = condition_split(query)
+                st.write("JSON Data:")
+                st.json(json_data)
+            else:
+                st.write("Please enter a query.")
+        if st.button("Make Prompt"):
+            if query:
+                docs = split_docs(documents)
+                embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
+                db1 = Chroma.from_documents(
+                    documents=docs,
+                    embedding=embeddings,
+                    persist_directory="embeddings"
+                )
+                db1.persist()
+                condition_string = make_prompt(query, db1)
+                st.write("Condition String:")
+                st.write(condition_string)
+            else:
+                st.write("Please enter a query.")
+        if st.button("Replace with Shielding"):
+            if query:
+                docs = split_docs(documents)
+                embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
+                db1 = Chroma.from_documents(
+                    documents=docs,
+                    embedding=embeddings,
+                    persist_directory="embeddings"
+                )
+                db1.persist()
+                condition_string = make_prompt(query, db1)
+                json_data = condition_split(query)
+                shielded_replaced_string = replace_with_shielding(json_data, condition_string)
+                st.write("Shielded Replaced String:")
+                st.write(shielded_replaced_string)
+            else:
+                st.write("Please enter a query.")
+        if st.button("Form JSON Rule"):
+            if query:
+                docs = split_docs(documents)
+                embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
+                db1 = Chroma.from_documents(
+                    documents=docs,
+                    embedding=embeddings,
+                    persist_directory="embeddings"
+                )
+                db1.persist()
+                condition_string = make_prompt(query, db1)
+                json_data = condition_split(query)
+                shielded_replaced_string = replace_with_shielding(json_data, condition_string)
+                json_rule = form_json_rule(shielded_replaced_string)
+                st.write("JSON Rule:")
+                st.json(json_rule)
+                st.download_button(
+                    label="Download JSON Rule",
+                    data=json.dumps(json_rule),
+                    file_name="json_rule.json",
+                    mime="application/json"
+                )
+            else:
+                st.write("Please enter a query.")
+if __name__ == '__main__':
+    main()

flight.jsnol ADDED Viewed

	@@ -0,0 +1,53 @@

+{"kpis": "hotel_id", "description": "A unique identifier assigned to each hotel. This ID helps in maintaining a distinct record for each hotel and acts as a primary key. It's also used for referencing in other tables like Rooms."}
+{"kpis": "hotel_name", "description": "The official name of the hotel. This column provides users with the name of the hotel they are booking or viewing. It aids in branding and recognition."}
+{"kpis": "location", "description": "Represents the city or area where the hotel is situated. This field helps users in filtering hotels based on their preferred destination."}
+{"kpis": "rating", "description": "Represents the average rating of the hotel, based on user reviews. Users often sort or filter hotels based on ratings to ensure they get the best experience. A higher rating usually indicates better customer satisfaction."}
+{"kpis": "reservation_id", "description": "A unique identifier for each reservation made on the platform. This ID ensures that each booking is distinct and can be referenced for any customer queries or modifications."}
+{"kpis": "user_id", "description": "A reference to a user from the Users table who made the reservation. Establishes which user made a specific booking, aiding in personalized user experiences and support."}
+{"kpis": "room_id", "description": "Refers to a specific room type in a hotel from the Rooms table. Ensures that the booking corresponds to a specific type of room in a particular hotel."}
+{"kpis": "start_date", "description": "Indicates the beginning date of the reservation. Helps in determining room availability and the user's stay period."}
+{"kpis": "end_date", "description": "Marks the termination date of the reservation. Assists in room inventory management and billing."}
+{"kpis": "room_id", "description": "A unique identifier for a specific room type in a hotel. This ID ensures that each room type in a hotel has a unique representation. It also plays a role in making reservations."}
+{"kpis": "hotel_id", "description": "An identifier that references a hotel from the Hotels table. This foreign key establishes a link between the room and its respective hotel, ensuring that rooms are correctly mapped to hotels."}
+{"kpis": "room_type", "description": "Categorizes rooms based on their features and amenities, e.g., Deluxe, Suite, etc. Users can choose a room based on their preferences, like a suite for luxurious stays or deluxe for standard ones."}
+{"kpis": "price_per_night", "description": "Indicates the cost of booking the room for one night. Helps users in understanding the pricing and aids in budget planning."}
+{"kpis": "availability", "description": "Specifies the number of such rooms available for booking. Ensures that overbooking doesn't occur and informs users about room scarcity."}
+{"kpis": "customer_id", "description": "A unique identifier for each customer. Used for referencing customers in queries and transactions."}
+{"kpis": "name", "description": "The full name of the customer. Used for personalizing customer interactions."}
+{"kpis": "contact_details", "description": "Contact information of the customer, including phone number and email. Used for communication with the customer."}
+{"kpis": "preferences", "description": "Stored preferences of the customer, such as room type and amenities. Used for tailoring recommendations and services to the customer."}
+{"kpis": "hotel_id", "description": "A unique identifier for each hotel. Used for linking amenities to specific hotels."}
+{"kpis": "amenity_type", "description": "The type of amenity offered, such as gym, pool, or spa. Used for filtering and listing amenities."}
+{"kpis": "availability", "description": "Indicates whether the amenity is currently available. Used for real-time amenity status updates."}
+{"kpis": "review_id", "description": "A unique identifier for each review. Used for referencing individual reviews."}
+{"kpis": "customer_id", "description": "The customer who provided the review. Used for linking reviews to customers."}
+{"kpis": "hotel_id", "description": "The hotel that the review pertains to. Used for aggregating reviews by hotel."}
+{"kpis": "rating", "description": "The rating given by the customer, usually on a scale from 1 to 5. Used for calculating the average rating of hotels."}
+{"kpis": "comments", "description": "Textual feedback provided by the customer. Used for qualitative analysis of customer satisfaction."}
+{"kpis": "payment_id", "description": "A unique identifier for each payment transaction. Used for tracking and auditing payments."}
+{"kpis": "customer_id", "description": "The customer who made the payment. Used for linking payments to customers."}
+{"kpis": "amount", "description": "The amount of the payment transaction. Used for financial reporting and auditing."}
+{"kpis": "status", "description": "The status of the payment, such as successful, pending, or failed. Used for real-time payment status tracking."}
+{"kpis": "payment_method", "description": "The method used for payment, such as credit card or PayPal. Used for analytics and reporting."}
+{"kpis": "discount_code", "description": "A unique code representing each discount. Used for applying discounts during payment."}
+{"kpis": "description", "description": "A brief description of the discount. Used for informing customers about the discount."}
+{"kpis": "eligibility", "description": "Criteria for eligibility, such as military personnel or membership status. Used for verifying discount eligibility."}
+{"kpis": "expiry_date", "description": "The date on which the discount expires. Used for discount lifecycle management."}
+{"kpis": "staff_id", "description": "A unique identifier for each staff member. Used for managing staff records."}
+{"kpis": "name", "description": "The full name of the staff member. Used for identification and communication."}
+{"kpis": "role", "description": "The role or position of the staff member. Used for assigning tasks and responsibilities."}
+{"kpis": "contact_details", "description": "Contact information of the staff member. Used for internal communication."}
+{"kpis": "work_schedule", "description": "The work schedule or shifts of the staff member. Used for staff management and scheduling."}
+{"kpis": "event_id", "description": "A unique identifier for each event. Used for event management and ticketing."}
+{"kpis": "hotel_id", "description": "The hotel where the event is hosted. Used for linking events to specific hotels."}
+{"kpis": "event_type", "description": "The type of event, such as wedding, conference, or show. Used for categorizing and filtering events."}
+{"kpis": "availability", "description": "Indicates whether tickets for the event are available. Used for real-time ticketing updates."}
+{"kpis": "date", "description": "The date on which the event is scheduled. Used for event planning and scheduling."}
+{"kpis": "flight_id", "description": "A unique identifier for each flight. Used for flight management and tracking."}
+{"kpis": "airline", "description": "The airline operating the flight. Used for identifying the airline and its routes."}
+{"kpis": "departure_airport", "description": "The airport from which the flight departs. Used for departure location information."}
+{"kpis": "arrival_airport", "description": "The airport to which the flight arrives. Used for arrival location information."}
+{"kpis": "departure_time", "description": "The scheduled departure time of the flight. Used for flight scheduling and planning."}
+{"kpis": "arrival_time", "description": "The scheduled arrival time of the flight. Used for flight scheduling and planning."}
+{"kpis": "seat_capacity", "description": "The total capacity of seats available on the flight. Used for seat allocation and booking."}
+{"kpis": "remaining_seats", "description": "The number of seats still available for booking on the flight. Used for real-time seat availability updates."}

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+langchain
+langchain-community
+chromadb
+sentence-transformers
+cohere
+pandas