from beyondllm import source,retrieve,embeddings,llms,generator import os from getpass import getpass from beyondllm.vectordb import ChromaVectorDb import json from graphviz import Digraph import graphviz import streamlit as st from beyondllm.llms import AzureOpenAIModel from beyondllm.embeddings import AzureAIEmbeddings from Json_2_tree import json_to_dot # os.environ["PATH"] += os.pathsep + 'Graphiviz/Graphviz-11.0.0-win64/bin/bin/' # os.environ["PATH"] += os.pathsep + "Graphviz2.38/bin/dot.exe" st.title("Chat with document") st.text("Enter API Key") # api_key = st.text_input("API Key:", type="password") # os.environ['OPENAI_API_KEY'] = api_key st.success("API Key entered successfully!") st.caption("Upload a PDF document to get information from the document.") uploaded_file = st.file_uploader("Choose a PDF file", type='pdf') submit=st.button("Get the data") if submit: question = "Give Decision taken in the document" system_prompt = '''You are a business analyst with extensive knowledge of legal documents and regulatory documentation. Your expertise is in helping people understand and extract key information from such documents. Your task is to extract the rules and exceptions in a way that enables the creation of a decision tree, facilitating integration into the proper flow. Legal Document Context: {context} Create a decision tree in JSON format based on the following structure: Write a question and question should be two response like yes or no. if yes it has fallowing answers or other question - If Yes, the result should be: "Not restricted" additional -Council regulations: provide dates and articles if possible. - If No, proceed to the next question2.( by giving some link to the next question not direct to next question) 2. Next question based on the previous question outcome. - If Yes, the result should be: "Not restricted" additional -Council regulations: provide dates and articles if possible. - If No, proceed to the next question. In simple terms - flow chat if conditons. [Continue this structure for as many questions as needed, ensuring each question branches into Yes/No answers and provides appropriate results based on the Council regulations.] Please continue this format for as many questions as needed, ensuring each question follows the same structure. Output is the JSON response follow this pattern: Do not change everytime Json output This is JSON output Example, add more questions in this formate only. { "Question1": , "Yes": { "Result": , "Council regulations": }, "No": { "Question2": , "Yes": { "Result":, "Council regulations": }, "No": { "Question3": , "Yes": { "Result": , "Council regulations": }, "No": { "Result": , "Council regulations": } } } } Additional Instructions: Analyze the entire document to identify all relevant rules and exceptions. Ensure that the descriptions of rules and exceptions are clear and concise. Include relevant dates, jurisdictions, and specific regulations where applicable. Structure the questions and answers to facilitate the creation of a logical decision tree or workflow. If the regulation mentions specific products, territories, or operations, include them in the appropriate sections. Aim to simplify legal language while maintaining accuracy and intent. [Provide your answer in JSON form. Reply with only the answer in JSON form and include no other commentary]: Provide your answer in JSON form. Reply with only the answer in JSON form and include no other commentary Return Valid Json to create Tree ''' if uploaded_file is not None and question: save_path = "./uploaded_files" if not os.path.exists(save_path): os.makedirs(save_path) file_path = os.path.join(save_path, uploaded_file.name) with open(file_path, "wb") as f: f.write(uploaded_file.getbuffer()) data = source.fit(file_path, dtype="pdf", chunk_size=1024, chunk_overlap=0) embed_model = AzureAIEmbeddings( endpoint_url="https://marketplace.openai.azure.com/", azure_key="d6d9522a01c74836907af2f3fd72ff85", api_version="2024-02-01", deployment_name="text-embed-marketplace") retriever = retrieve.auto_retriever(data, embed_model, type="normal", top_k=4) # vectordb = ChromaVectorDb(collection_name="my_persistent_collection", persist_directory="./db/chroma/") # llm = llms.ChatOpenAIModel() BASE_URL = "https://gpt-res.openai.azure.com/" DEPLOYMENT_NAME= "gpt-4-32k" API_KEY = "a20bc67dbd7c47ed8c978bbcfdacf930" llm = AzureOpenAIModel(model="gpt4",azure_key = API_KEY,deployment_name=DEPLOYMENT_NAME ,endpoint_url=BASE_URL,model_kwargs={"max_tokens":512,"temperature":0.1}) pipeline = generator.Generate(question=question, system_prompt=system_prompt, retriever=retriever, llm=llm) decision_tree_json = pipeline.call() response = json.loads(decision_tree_json) # Function to recursively create DOT format from JSON def json_to_dot(graph, node_id, parent_node, parent_label): if isinstance(parent_node, dict): for key, value in parent_node.items(): if key.startswith("Question"): question_id = f"{node_id}_{key}" label_text = "\n".join(value[i:i+30] for i in range(0, len(value), 30)) shape = 'diamond' if len(value) > 50 else 'box' graph.node(question_id, label_text, shape=shape, style='filled', fillcolor='lightblue') graph.edge(parent_label, question_id, color='black') json_to_dot(graph, question_id, value, question_id) elif key in ["Yes", "No"]: option_label = f"{node_id}_{key}" graph.node(option_label, key, shape='box', style='filled', fillcolor='lightgreen' if key == "Yes" else 'lightcoral') graph.edge(parent_label, option_label, label=key, color='black') json_to_dot(graph, option_label, value, option_label) elif key == "Result": result_label = f"{node_id}_{key}" result_str = f"{key}: {value}\nCouncil regulations: {parent_node['Council regulations']}" graph.node(result_label, result_str, shape='box', style='filled', fillcolor='lightgrey') graph.edge(parent_label, result_label, color='black') # Create a new graph dot = graphviz.Digraph(comment='Decision Tree') # Add the root node dot.node('Root', 'Start', shape='ellipse', style='filled', fillcolor='lightyellow') # Build the DOT format json_to_dot(dot, "Root", response, "Root") # Render and display the graph using Graphviz engine dot.format = 'png' dot.render('decision_tree', view=True) import streamlit as st with st.chat_message(""): st.write("") st.image('decision_tree.png', caption='tree from json')