File size: 7,618 Bytes
7829f59
 
 
 
 
 
 
 
 
 
ee274ab
3f994ff
aaa2984
 
7b0f868
7829f59
7b0f868
7829f59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
from beyondllm import source,retrieve,embeddings,llms,generator
import os
from getpass import getpass
from beyondllm.vectordb import ChromaVectorDb
import json
from graphviz import Digraph
import graphviz
import streamlit as st
from beyondllm.llms import AzureOpenAIModel
from beyondllm.embeddings import AzureAIEmbeddings
from Json_2_tree import json_to_dot

# os.environ["PATH"] += os.pathsep + 'Graphiviz/Graphviz-11.0.0-win64/bin/bin/'
# os.environ["PATH"] += os.pathsep + "Graphviz2.38/bin/dot.exe"

st.title("Chat with document")

st.text("Enter API Key")

# api_key = st.text_input("API Key:", type="password")
# os.environ['OPENAI_API_KEY'] = api_key
st.success("API Key entered successfully!")

st.caption("Upload a PDF document to get information from the document.")
uploaded_file = st.file_uploader("Choose a PDF file", type='pdf')
submit=st.button("Get the data")
if submit:

    question = "Give Decision taken in the document"
    system_prompt = '''You are a business analyst with extensive knowledge of legal documents and regulatory documentation.
    Your expertise is in helping people understand and extract key information from such documents. 
    Your task is to extract the rules and exceptions in a way that enables the creation of a decision tree, facilitating integration into the proper flow.
    Legal Document Context: {context}
    
    Create a decision tree in JSON format based on the following structure:

    Write a question and question should be two response like yes or no. if yes it has fallowing answers or other question 
        - If Yes, the result should be: "Not restricted" additional -Council regulations: provide dates and articles if possible.
        - If No, proceed to the next question2.( by giving some link to the next question not direct to next question)

    2. Next question based on the previous question outcome.
        - If Yes, the result should be: "Not restricted" additional -Council regulations: provide dates and articles if possible.
        - If No, proceed to the next question.
    In simple terms - flow chat if conditons.
    [Continue this structure for as many questions as needed, ensuring each question branches into Yes/No answers and provides appropriate results based on the Council regulations.]
    Please continue this format for as many questions as needed, ensuring each question follows the same structure.
    Output is the JSON response follow this pattern: Do not change everytime Json output
    This is JSON output Example, add more questions in this formate only. 
        {
        "Question1": ,
        "Yes": {
            "Result": ,
            "Council regulations": 
        },
        "No": {
            "Question2": ,
            "Yes": {
                "Result":,
                "Council regulations": 
            },
            "No": {
                "Question3": ,
                "Yes": {
                    "Result": ,
                    "Council regulations":
                },
                "No": {
                    "Result": ,
                    "Council regulations": 
                }
            }
        }
    }
    Additional Instructions:
    
    Analyze the entire document to identify all relevant rules and exceptions.
    Ensure that the descriptions of rules and exceptions are clear and concise.
    Include relevant dates, jurisdictions, and specific regulations where applicable.
    Structure the questions and answers to facilitate the creation of a logical decision tree or workflow.
    If the regulation mentions specific products, territories, or operations, include them in the appropriate sections.
    Aim to simplify legal language while maintaining accuracy and intent.
    [Provide your answer in JSON form. Reply with only the answer in JSON form and include no other commentary]:
    Provide your answer in JSON form. Reply with only the answer in JSON form and include no other commentary

    Return Valid Json to create Tree 
    '''

    

    if uploaded_file is not None and question:

        save_path = "./uploaded_files"
        if not os.path.exists(save_path):
            os.makedirs(save_path)
        file_path = os.path.join(save_path, uploaded_file.name)
        with open(file_path, "wb") as f:
            f.write(uploaded_file.getbuffer())

        data = source.fit(file_path, dtype="pdf", chunk_size=1024, chunk_overlap=0)
        embed_model = AzureAIEmbeddings(
                endpoint_url="https://marketplace.openai.azure.com/",
                azure_key="d6d9522a01c74836907af2f3fd72ff85",
                api_version="2024-02-01",
                deployment_name="text-embed-marketplace")

        retriever = retrieve.auto_retriever(data, embed_model, type="normal", top_k=4)
        # vectordb = ChromaVectorDb(collection_name="my_persistent_collection", persist_directory="./db/chroma/")

        # llm = llms.ChatOpenAIModel()
        BASE_URL = "https://gpt-res.openai.azure.com/"
        DEPLOYMENT_NAME= "gpt-4-32k" 
        API_KEY = "a20bc67dbd7c47ed8c978bbcfdacf930"
        llm = AzureOpenAIModel(model="gpt4",azure_key = API_KEY,deployment_name=DEPLOYMENT_NAME ,endpoint_url=BASE_URL,model_kwargs={"max_tokens":512,"temperature":0.1})
        pipeline = generator.Generate(question=question, system_prompt=system_prompt, retriever=retriever, llm=llm)
        decision_tree_json = pipeline.call()
        response = json.loads(decision_tree_json)
        # Function to recursively create DOT format from JSON
        def json_to_dot(graph, node_id, parent_node, parent_label):
            if isinstance(parent_node, dict):
                for key, value in parent_node.items():
                    if key.startswith("Question"):
                        question_id = f"{node_id}_{key}"
                        label_text = "\n".join(value[i:i+30] for i in range(0, len(value), 30))
                        shape = 'diamond' if len(value) > 50 else 'box'
                        graph.node(question_id, label_text, shape=shape, style='filled', fillcolor='lightblue')
                        graph.edge(parent_label, question_id, color='black')
                        json_to_dot(graph, question_id, value, question_id)
                    elif key in ["Yes", "No"]:
                        option_label = f"{node_id}_{key}"
                        graph.node(option_label, key, shape='box', style='filled', fillcolor='lightgreen' if key == "Yes" else 'lightcoral')
                        graph.edge(parent_label, option_label, label=key, color='black')
                        json_to_dot(graph, option_label, value, option_label)
                    elif key == "Result":
                        result_label = f"{node_id}_{key}"
                        result_str = f"{key}: {value}\nCouncil regulations: {parent_node['Council regulations']}"
                        graph.node(result_label, result_str, shape='box', style='filled', fillcolor='lightgrey')
                        graph.edge(parent_label, result_label, color='black')

        # Create a new graph
        dot = graphviz.Digraph(comment='Decision Tree')
        # Add the root node
        dot.node('Root', 'Start', shape='ellipse', style='filled', fillcolor='lightyellow')
        # Build the DOT format
        json_to_dot(dot, "Root", response, "Root")
        # Render and display the graph using Graphviz engine
        dot.format = 'png'
        dot.render('decision_tree', view=True)
        import streamlit as st
        with st.chat_message(""):
            st.write("")
            st.image('decision_tree.png', caption='tree from json')