Spaces:
Configuration error
Configuration error
from beyondllm import source,retrieve,embeddings,llms,generator | |
import os | |
from getpass import getpass | |
from beyondllm.vectordb import ChromaVectorDb | |
import json | |
from graphviz import Digraph | |
import graphviz | |
import streamlit as st | |
from beyondllm.llms import AzureOpenAIModel | |
from beyondllm.embeddings import AzureAIEmbeddings | |
from Json_2_tree import json_to_dot | |
# os.environ["PATH"] += os.pathsep + 'Graphiviz/Graphviz-11.0.0-win64/bin/bin/' | |
# os.environ["PATH"] += os.pathsep + "Graphviz2.38/bin/dot.exe" | |
st.title("Chat with document") | |
st.text("Enter API Key") | |
# api_key = st.text_input("API Key:", type="password") | |
# os.environ['OPENAI_API_KEY'] = api_key | |
st.success("API Key entered successfully!") | |
st.caption("Upload a PDF document to get information from the document.") | |
uploaded_file = st.file_uploader("Choose a PDF file", type='pdf') | |
submit=st.button("Get the data") | |
if submit: | |
question = "Give Decision taken in the document" | |
system_prompt = '''You are a business analyst with extensive knowledge of legal documents and regulatory documentation. | |
Your expertise is in helping people understand and extract key information from such documents. | |
Your task is to extract the rules and exceptions in a way that enables the creation of a decision tree, facilitating integration into the proper flow. | |
Legal Document Context: {context} | |
Create a decision tree in JSON format based on the following structure: | |
Write a question and question should be two response like yes or no. if yes it has fallowing answers or other question | |
- If Yes, the result should be: "Not restricted" additional -Council regulations: provide dates and articles if possible. | |
- If No, proceed to the next question2.( by giving some link to the next question not direct to next question) | |
2. Next question based on the previous question outcome. | |
- If Yes, the result should be: "Not restricted" additional -Council regulations: provide dates and articles if possible. | |
- If No, proceed to the next question. | |
In simple terms - flow chat if conditons. | |
[Continue this structure for as many questions as needed, ensuring each question branches into Yes/No answers and provides appropriate results based on the Council regulations.] | |
Please continue this format for as many questions as needed, ensuring each question follows the same structure. | |
Output is the JSON response follow this pattern: Do not change everytime Json output | |
This is JSON output Example, add more questions in this formate only. | |
{ | |
"Question1": , | |
"Yes": { | |
"Result": , | |
"Council regulations": | |
}, | |
"No": { | |
"Question2": , | |
"Yes": { | |
"Result":, | |
"Council regulations": | |
}, | |
"No": { | |
"Question3": , | |
"Yes": { | |
"Result": , | |
"Council regulations": | |
}, | |
"No": { | |
"Result": , | |
"Council regulations": | |
} | |
} | |
} | |
} | |
Additional Instructions: | |
Analyze the entire document to identify all relevant rules and exceptions. | |
Ensure that the descriptions of rules and exceptions are clear and concise. | |
Include relevant dates, jurisdictions, and specific regulations where applicable. | |
Structure the questions and answers to facilitate the creation of a logical decision tree or workflow. | |
If the regulation mentions specific products, territories, or operations, include them in the appropriate sections. | |
Aim to simplify legal language while maintaining accuracy and intent. | |
[Provide your answer in JSON form. Reply with only the answer in JSON form and include no other commentary]: | |
Provide your answer in JSON form. Reply with only the answer in JSON form and include no other commentary | |
Return Valid Json to create Tree | |
''' | |
if uploaded_file is not None and question: | |
save_path = "./uploaded_files" | |
if not os.path.exists(save_path): | |
os.makedirs(save_path) | |
file_path = os.path.join(save_path, uploaded_file.name) | |
with open(file_path, "wb") as f: | |
f.write(uploaded_file.getbuffer()) | |
data = source.fit(file_path, dtype="pdf", chunk_size=1024, chunk_overlap=0) | |
embed_model = AzureAIEmbeddings( | |
endpoint_url="https://marketplace.openai.azure.com/", | |
azure_key="d6d9522a01c74836907af2f3fd72ff85", | |
api_version="2024-02-01", | |
deployment_name="text-embed-marketplace") | |
retriever = retrieve.auto_retriever(data, embed_model, type="normal", top_k=4) | |
# vectordb = ChromaVectorDb(collection_name="my_persistent_collection", persist_directory="./db/chroma/") | |
# llm = llms.ChatOpenAIModel() | |
BASE_URL = "https://gpt-res.openai.azure.com/" | |
DEPLOYMENT_NAME= "gpt-4-32k" | |
API_KEY = "a20bc67dbd7c47ed8c978bbcfdacf930" | |
llm = AzureOpenAIModel(model="gpt4",azure_key = API_KEY,deployment_name=DEPLOYMENT_NAME ,endpoint_url=BASE_URL,model_kwargs={"max_tokens":512,"temperature":0.1}) | |
pipeline = generator.Generate(question=question, system_prompt=system_prompt, retriever=retriever, llm=llm) | |
decision_tree_json = pipeline.call() | |
response = json.loads(decision_tree_json) | |
# Function to recursively create DOT format from JSON | |
def json_to_dot(graph, node_id, parent_node, parent_label): | |
if isinstance(parent_node, dict): | |
for key, value in parent_node.items(): | |
if key.startswith("Question"): | |
question_id = f"{node_id}_{key}" | |
label_text = "\n".join(value[i:i+30] for i in range(0, len(value), 30)) | |
shape = 'diamond' if len(value) > 50 else 'box' | |
graph.node(question_id, label_text, shape=shape, style='filled', fillcolor='lightblue') | |
graph.edge(parent_label, question_id, color='black') | |
json_to_dot(graph, question_id, value, question_id) | |
elif key in ["Yes", "No"]: | |
option_label = f"{node_id}_{key}" | |
graph.node(option_label, key, shape='box', style='filled', fillcolor='lightgreen' if key == "Yes" else 'lightcoral') | |
graph.edge(parent_label, option_label, label=key, color='black') | |
json_to_dot(graph, option_label, value, option_label) | |
elif key == "Result": | |
result_label = f"{node_id}_{key}" | |
result_str = f"{key}: {value}\nCouncil regulations: {parent_node['Council regulations']}" | |
graph.node(result_label, result_str, shape='box', style='filled', fillcolor='lightgrey') | |
graph.edge(parent_label, result_label, color='black') | |
# Create a new graph | |
dot = graphviz.Digraph(comment='Decision Tree') | |
# Add the root node | |
dot.node('Root', 'Start', shape='ellipse', style='filled', fillcolor='lightyellow') | |
# Build the DOT format | |
json_to_dot(dot, "Root", response, "Root") | |
# Render and display the graph using Graphviz engine | |
dot.format = 'png' | |
dot.render('decision_tree', view=True) | |
import streamlit as st | |
with st.chat_message(""): | |
st.write("") | |
st.image('decision_tree.png', caption='tree from json') | |