Hemasagar commited on
Commit
508499a
·
verified ·
1 Parent(s): 794e173

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -150
app.py CHANGED
@@ -1,151 +1,42 @@
1
- from beyondllm import source,retrieve,embeddings,llms,generator
2
- import os
3
- from getpass import getpass
4
- from beyondllm.vectordb import ChromaVectorDb
5
- import json
6
- from graphviz import Digraph
7
- import graphviz
8
  import streamlit as st
9
- from beyondllm.llms import AzureOpenAIModel
10
- from beyondllm.embeddings import AzureAIEmbeddings
11
- from Json_2_tree import json_to_dot
12
-
13
- # os.environ["PATH"] += os.pathsep + 'Graphiviz/Graphviz-11.0.0-win64/bin/bin/'
14
- # os.environ["PATH"] += os.pathsep + "Graphviz2.38/bin/dot.exe"
15
-
16
- st.title("Chat with document")
17
-
18
- st.text("Enter API Key")
19
-
20
- # api_key = st.text_input("API Key:", type="password")
21
- # os.environ['OPENAI_API_KEY'] = api_key
22
- st.success("API Key entered successfully!")
23
-
24
- st.caption("Upload a PDF document to get information from the document.")
25
- uploaded_file = st.file_uploader("Choose a PDF file", type='pdf')
26
- submit=st.button("Get the data")
27
- if submit:
28
-
29
- question = "Give Decision taken in the document"
30
- system_prompt = '''You are a business analyst with extensive knowledge of legal documents and regulatory documentation.
31
- Your expertise is in helping people understand and extract key information from such documents.
32
- Your task is to extract the rules and exceptions in a way that enables the creation of a decision tree, facilitating integration into the proper flow.
33
- Legal Document Context: {context}
34
-
35
- Create a decision tree in JSON format based on the following structure:
36
-
37
- Write a question and question should be two response like yes or no. if yes it has fallowing answers or other question
38
- - If Yes, the result should be: "Not restricted" additional -Council regulations: provide dates and articles if possible.
39
- - If No, proceed to the next question2.( by giving some link to the next question not direct to next question)
40
-
41
- 2. Next question based on the previous question outcome.
42
- - If Yes, the result should be: "Not restricted" additional -Council regulations: provide dates and articles if possible.
43
- - If No, proceed to the next question.
44
- In simple terms - flow chat if conditons.
45
- [Continue this structure for as many questions as needed, ensuring each question branches into Yes/No answers and provides appropriate results based on the Council regulations.]
46
- Please continue this format for as many questions as needed, ensuring each question follows the same structure.
47
- Output is the JSON response follow this pattern: Do not change everytime Json output
48
- This is JSON output Example, add more questions in this formate only.
49
- {
50
- "Question1": ,
51
- "Yes": {
52
- "Result": ,
53
- "Council regulations":
54
- },
55
- "No": {
56
- "Question2": ,
57
- "Yes": {
58
- "Result":,
59
- "Council regulations":
60
- },
61
- "No": {
62
- "Question3": ,
63
- "Yes": {
64
- "Result": ,
65
- "Council regulations":
66
- },
67
- "No": {
68
- "Result": ,
69
- "Council regulations":
70
- }
71
- }
72
- }
73
- }
74
- Additional Instructions:
75
-
76
- Analyze the entire document to identify all relevant rules and exceptions.
77
- Ensure that the descriptions of rules and exceptions are clear and concise.
78
- Include relevant dates, jurisdictions, and specific regulations where applicable.
79
- Structure the questions and answers to facilitate the creation of a logical decision tree or workflow.
80
- If the regulation mentions specific products, territories, or operations, include them in the appropriate sections.
81
- Aim to simplify legal language while maintaining accuracy and intent.
82
- [Provide your answer in JSON form. Reply with only the answer in JSON form and include no other commentary]:
83
- Provide your answer in JSON form. Reply with only the answer in JSON form and include no other commentary
84
-
85
- Return Valid Json to create Tree
86
- '''
87
-
88
-
89
-
90
- if uploaded_file is not None and question:
91
-
92
- save_path = "./uploaded_files"
93
- if not os.path.exists(save_path):
94
- os.makedirs(save_path)
95
- file_path = os.path.join(save_path, uploaded_file.name)
96
- with open(file_path, "wb") as f:
97
- f.write(uploaded_file.getbuffer())
98
-
99
- data = source.fit(file_path, dtype="pdf", chunk_size=1024, chunk_overlap=0)
100
- embed_model = AzureAIEmbeddings(
101
- endpoint_url="https://marketplace.openai.azure.com/",
102
- azure_key="d6d9522a01c74836907af2f3fd72ff85",
103
- api_version="2024-02-01",
104
- deployment_name="text-embed-marketplace")
105
-
106
- retriever = retrieve.auto_retriever(data, embed_model, type="normal", top_k=4)
107
- # vectordb = ChromaVectorDb(collection_name="my_persistent_collection", persist_directory="./db/chroma/")
108
-
109
- # llm = llms.ChatOpenAIModel()
110
- BASE_URL = "https://gpt-res.openai.azure.com/"
111
- DEPLOYMENT_NAME= "gpt-4-32k"
112
- API_KEY = "a20bc67dbd7c47ed8c978bbcfdacf930"
113
- llm = AzureOpenAIModel(model="gpt4",azure_key = API_KEY,deployment_name=DEPLOYMENT_NAME ,endpoint_url=BASE_URL,model_kwargs={"max_tokens":512,"temperature":0.1})
114
- pipeline = generator.Generate(question=question, system_prompt=system_prompt, retriever=retriever, llm=llm)
115
- decision_tree_json = pipeline.call()
116
- response = json.loads(decision_tree_json)
117
- # Function to recursively create DOT format from JSON
118
- def json_to_dot(graph, node_id, parent_node, parent_label):
119
- if isinstance(parent_node, dict):
120
- for key, value in parent_node.items():
121
- if key.startswith("Question"):
122
- question_id = f"{node_id}_{key}"
123
- label_text = "\n".join(value[i:i+30] for i in range(0, len(value), 30))
124
- shape = 'diamond' if len(value) > 50 else 'box'
125
- graph.node(question_id, label_text, shape=shape, style='filled', fillcolor='lightblue')
126
- graph.edge(parent_label, question_id, color='black')
127
- json_to_dot(graph, question_id, value, question_id)
128
- elif key in ["Yes", "No"]:
129
- option_label = f"{node_id}_{key}"
130
- graph.node(option_label, key, shape='box', style='filled', fillcolor='lightgreen' if key == "Yes" else 'lightcoral')
131
- graph.edge(parent_label, option_label, label=key, color='black')
132
- json_to_dot(graph, option_label, value, option_label)
133
- elif key == "Result":
134
- result_label = f"{node_id}_{key}"
135
- result_str = f"{key}: {value}\nCouncil regulations: {parent_node['Council regulations']}"
136
- graph.node(result_label, result_str, shape='box', style='filled', fillcolor='lightgrey')
137
- graph.edge(parent_label, result_label, color='black')
138
-
139
- # Create a new graph
140
- dot = graphviz.Digraph(comment='Decision Tree')
141
- # Add the root node
142
- dot.node('Root', 'Start', shape='ellipse', style='filled', fillcolor='lightyellow')
143
- # Build the DOT format
144
- json_to_dot(dot, "Root", response, "Root")
145
- # Render and display the graph using Graphviz engine
146
- dot.format = 'png'
147
- dot.render('decision_tree', view=True)
148
- import streamlit as st
149
- with st.chat_message(""):
150
- st.write("")
151
- st.image('decision_tree.png', caption='tree from json')
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+
5
+ st.title('Uber pickups in NYC')
6
+
7
+ DATE_COLUMN = 'date/time'
8
+ DATA_URL = ('https://s3-us-west-2.amazonaws.com/'
9
+ 'streamlit-demo-data/uber-raw-data-sep14.csv.gz')
10
+
11
+ @st.cache_data
12
+ def load_data(nrows):
13
+ data = pd.read_csv(DATA_URL, nrows=nrows)
14
+ lowercase = lambda x: str(x).lower()
15
+ data.rename(lowercase, axis='columns', inplace=True)
16
+ data[DATE_COLUMN] = pd.to_datetime(data[DATE_COLUMN])
17
+ return data
18
+
19
+ data_load_state = st.text('Loading data...')
20
+ data = load_data(10000)
21
+ data_load_state.text("Done! (using st.cache)")
22
+
23
+ if st.checkbox('Show raw data'):
24
+ st.subheader('Raw data')
25
+ st.write(data)
26
+
27
+ st.subheader('Number of pickups by hour')
28
+ hist_values = np.histogram(data[DATE_COLUMN].dt.hour, bins=24, range=(0,24))[0]
29
+ st.bar_chart(hist_values)
30
+
31
+ # Some number in the range 0-23
32
+ hour_to_filter = st.slider('hour', 0, 23, 17)
33
+ filtered_data = data[data[DATE_COLUMN].dt.hour == hour_to_filter]
34
+
35
+ st.subheader('Map of all pickups at %s:00' % hour_to_filter)
36
+ st.map(filtered_data)
37
+
38
+ uploaded_file = st.file_uploader("Choose a file")
39
+ if uploaded_file is not None:
40
+ st.write(uploaded_file.name)
41
+ bytes_data = uploaded_file.getvalue()
42
+ st.write(len(bytes_data), "bytes")