Hemasagar's picture
Update app.py
3f994ff verified
raw
history blame
7.62 kB
from beyondllm import source,retrieve,embeddings,llms,generator
import os
from getpass import getpass
from beyondllm.vectordb import ChromaVectorDb
import json
from graphviz import Digraph
import graphviz
import streamlit as st
from beyondllm.llms import AzureOpenAIModel
from beyondllm.embeddings import AzureAIEmbeddings
from Json_2_tree import json_to_dot
# os.environ["PATH"] += os.pathsep + 'Graphiviz/Graphviz-11.0.0-win64/bin/bin/'
# os.environ["PATH"] += os.pathsep + "Graphviz2.38/bin/dot.exe"
st.title("Chat with document")
st.text("Enter API Key")
# api_key = st.text_input("API Key:", type="password")
# os.environ['OPENAI_API_KEY'] = api_key
st.success("API Key entered successfully!")
st.caption("Upload a PDF document to get information from the document.")
uploaded_file = st.file_uploader("Choose a PDF file", type='pdf')
submit=st.button("Get the data")
if submit:
question = "Give Decision taken in the document"
system_prompt = '''You are a business analyst with extensive knowledge of legal documents and regulatory documentation.
Your expertise is in helping people understand and extract key information from such documents.
Your task is to extract the rules and exceptions in a way that enables the creation of a decision tree, facilitating integration into the proper flow.
Legal Document Context: {context}
Create a decision tree in JSON format based on the following structure:
Write a question and question should be two response like yes or no. if yes it has fallowing answers or other question
- If Yes, the result should be: "Not restricted" additional -Council regulations: provide dates and articles if possible.
- If No, proceed to the next question2.( by giving some link to the next question not direct to next question)
2. Next question based on the previous question outcome.
- If Yes, the result should be: "Not restricted" additional -Council regulations: provide dates and articles if possible.
- If No, proceed to the next question.
In simple terms - flow chat if conditons.
[Continue this structure for as many questions as needed, ensuring each question branches into Yes/No answers and provides appropriate results based on the Council regulations.]
Please continue this format for as many questions as needed, ensuring each question follows the same structure.
Output is the JSON response follow this pattern: Do not change everytime Json output
This is JSON output Example, add more questions in this formate only.
{
"Question1": ,
"Yes": {
"Result": ,
"Council regulations":
},
"No": {
"Question2": ,
"Yes": {
"Result":,
"Council regulations":
},
"No": {
"Question3": ,
"Yes": {
"Result": ,
"Council regulations":
},
"No": {
"Result": ,
"Council regulations":
}
}
}
}
Additional Instructions:
Analyze the entire document to identify all relevant rules and exceptions.
Ensure that the descriptions of rules and exceptions are clear and concise.
Include relevant dates, jurisdictions, and specific regulations where applicable.
Structure the questions and answers to facilitate the creation of a logical decision tree or workflow.
If the regulation mentions specific products, territories, or operations, include them in the appropriate sections.
Aim to simplify legal language while maintaining accuracy and intent.
[Provide your answer in JSON form. Reply with only the answer in JSON form and include no other commentary]:
Provide your answer in JSON form. Reply with only the answer in JSON form and include no other commentary
Return Valid Json to create Tree
'''
if uploaded_file is not None and question:
save_path = "./uploaded_files"
if not os.path.exists(save_path):
os.makedirs(save_path)
file_path = os.path.join(save_path, uploaded_file.name)
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
data = source.fit(file_path, dtype="pdf", chunk_size=1024, chunk_overlap=0)
embed_model = AzureAIEmbeddings(
endpoint_url="https://marketplace.openai.azure.com/",
azure_key="d6d9522a01c74836907af2f3fd72ff85",
api_version="2024-02-01",
deployment_name="text-embed-marketplace")
retriever = retrieve.auto_retriever(data, embed_model, type="normal", top_k=4)
# vectordb = ChromaVectorDb(collection_name="my_persistent_collection", persist_directory="./db/chroma/")
# llm = llms.ChatOpenAIModel()
BASE_URL = "https://gpt-res.openai.azure.com/"
DEPLOYMENT_NAME= "gpt-4-32k"
API_KEY = "a20bc67dbd7c47ed8c978bbcfdacf930"
llm = AzureOpenAIModel(model="gpt4",azure_key = API_KEY,deployment_name=DEPLOYMENT_NAME ,endpoint_url=BASE_URL,model_kwargs={"max_tokens":512,"temperature":0.1})
pipeline = generator.Generate(question=question, system_prompt=system_prompt, retriever=retriever, llm=llm)
decision_tree_json = pipeline.call()
response = json.loads(decision_tree_json)
# Function to recursively create DOT format from JSON
def json_to_dot(graph, node_id, parent_node, parent_label):
if isinstance(parent_node, dict):
for key, value in parent_node.items():
if key.startswith("Question"):
question_id = f"{node_id}_{key}"
label_text = "\n".join(value[i:i+30] for i in range(0, len(value), 30))
shape = 'diamond' if len(value) > 50 else 'box'
graph.node(question_id, label_text, shape=shape, style='filled', fillcolor='lightblue')
graph.edge(parent_label, question_id, color='black')
json_to_dot(graph, question_id, value, question_id)
elif key in ["Yes", "No"]:
option_label = f"{node_id}_{key}"
graph.node(option_label, key, shape='box', style='filled', fillcolor='lightgreen' if key == "Yes" else 'lightcoral')
graph.edge(parent_label, option_label, label=key, color='black')
json_to_dot(graph, option_label, value, option_label)
elif key == "Result":
result_label = f"{node_id}_{key}"
result_str = f"{key}: {value}\nCouncil regulations: {parent_node['Council regulations']}"
graph.node(result_label, result_str, shape='box', style='filled', fillcolor='lightgrey')
graph.edge(parent_label, result_label, color='black')
# Create a new graph
dot = graphviz.Digraph(comment='Decision Tree')
# Add the root node
dot.node('Root', 'Start', shape='ellipse', style='filled', fillcolor='lightyellow')
# Build the DOT format
json_to_dot(dot, "Root", response, "Root")
# Render and display the graph using Graphviz engine
dot.format = 'png'
dot.render('decision_tree', view=True)
import streamlit as st
with st.chat_message(""):
st.write("")
st.image('decision_tree.png', caption='tree from json')