Spaces:
Configuration error
Configuration error
Update app.py
Browse files
app.py
CHANGED
@@ -1,151 +1,42 @@
|
|
1 |
-
from beyondllm import source,retrieve,embeddings,llms,generator
|
2 |
-
import os
|
3 |
-
from getpass import getpass
|
4 |
-
from beyondllm.vectordb import ChromaVectorDb
|
5 |
-
import json
|
6 |
-
from graphviz import Digraph
|
7 |
-
import graphviz
|
8 |
import streamlit as st
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
st.
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
"Question1": ,
|
51 |
-
"Yes": {
|
52 |
-
"Result": ,
|
53 |
-
"Council regulations":
|
54 |
-
},
|
55 |
-
"No": {
|
56 |
-
"Question2": ,
|
57 |
-
"Yes": {
|
58 |
-
"Result":,
|
59 |
-
"Council regulations":
|
60 |
-
},
|
61 |
-
"No": {
|
62 |
-
"Question3": ,
|
63 |
-
"Yes": {
|
64 |
-
"Result": ,
|
65 |
-
"Council regulations":
|
66 |
-
},
|
67 |
-
"No": {
|
68 |
-
"Result": ,
|
69 |
-
"Council regulations":
|
70 |
-
}
|
71 |
-
}
|
72 |
-
}
|
73 |
-
}
|
74 |
-
Additional Instructions:
|
75 |
-
|
76 |
-
Analyze the entire document to identify all relevant rules and exceptions.
|
77 |
-
Ensure that the descriptions of rules and exceptions are clear and concise.
|
78 |
-
Include relevant dates, jurisdictions, and specific regulations where applicable.
|
79 |
-
Structure the questions and answers to facilitate the creation of a logical decision tree or workflow.
|
80 |
-
If the regulation mentions specific products, territories, or operations, include them in the appropriate sections.
|
81 |
-
Aim to simplify legal language while maintaining accuracy and intent.
|
82 |
-
[Provide your answer in JSON form. Reply with only the answer in JSON form and include no other commentary]:
|
83 |
-
Provide your answer in JSON form. Reply with only the answer in JSON form and include no other commentary
|
84 |
-
|
85 |
-
Return Valid Json to create Tree
|
86 |
-
'''
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
if uploaded_file is not None and question:
|
91 |
-
|
92 |
-
save_path = "./uploaded_files"
|
93 |
-
if not os.path.exists(save_path):
|
94 |
-
os.makedirs(save_path)
|
95 |
-
file_path = os.path.join(save_path, uploaded_file.name)
|
96 |
-
with open(file_path, "wb") as f:
|
97 |
-
f.write(uploaded_file.getbuffer())
|
98 |
-
|
99 |
-
data = source.fit(file_path, dtype="pdf", chunk_size=1024, chunk_overlap=0)
|
100 |
-
embed_model = AzureAIEmbeddings(
|
101 |
-
endpoint_url="https://marketplace.openai.azure.com/",
|
102 |
-
azure_key="d6d9522a01c74836907af2f3fd72ff85",
|
103 |
-
api_version="2024-02-01",
|
104 |
-
deployment_name="text-embed-marketplace")
|
105 |
-
|
106 |
-
retriever = retrieve.auto_retriever(data, embed_model, type="normal", top_k=4)
|
107 |
-
# vectordb = ChromaVectorDb(collection_name="my_persistent_collection", persist_directory="./db/chroma/")
|
108 |
-
|
109 |
-
# llm = llms.ChatOpenAIModel()
|
110 |
-
BASE_URL = "https://gpt-res.openai.azure.com/"
|
111 |
-
DEPLOYMENT_NAME= "gpt-4-32k"
|
112 |
-
API_KEY = "a20bc67dbd7c47ed8c978bbcfdacf930"
|
113 |
-
llm = AzureOpenAIModel(model="gpt4",azure_key = API_KEY,deployment_name=DEPLOYMENT_NAME ,endpoint_url=BASE_URL,model_kwargs={"max_tokens":512,"temperature":0.1})
|
114 |
-
pipeline = generator.Generate(question=question, system_prompt=system_prompt, retriever=retriever, llm=llm)
|
115 |
-
decision_tree_json = pipeline.call()
|
116 |
-
response = json.loads(decision_tree_json)
|
117 |
-
# Function to recursively create DOT format from JSON
|
118 |
-
def json_to_dot(graph, node_id, parent_node, parent_label):
|
119 |
-
if isinstance(parent_node, dict):
|
120 |
-
for key, value in parent_node.items():
|
121 |
-
if key.startswith("Question"):
|
122 |
-
question_id = f"{node_id}_{key}"
|
123 |
-
label_text = "\n".join(value[i:i+30] for i in range(0, len(value), 30))
|
124 |
-
shape = 'diamond' if len(value) > 50 else 'box'
|
125 |
-
graph.node(question_id, label_text, shape=shape, style='filled', fillcolor='lightblue')
|
126 |
-
graph.edge(parent_label, question_id, color='black')
|
127 |
-
json_to_dot(graph, question_id, value, question_id)
|
128 |
-
elif key in ["Yes", "No"]:
|
129 |
-
option_label = f"{node_id}_{key}"
|
130 |
-
graph.node(option_label, key, shape='box', style='filled', fillcolor='lightgreen' if key == "Yes" else 'lightcoral')
|
131 |
-
graph.edge(parent_label, option_label, label=key, color='black')
|
132 |
-
json_to_dot(graph, option_label, value, option_label)
|
133 |
-
elif key == "Result":
|
134 |
-
result_label = f"{node_id}_{key}"
|
135 |
-
result_str = f"{key}: {value}\nCouncil regulations: {parent_node['Council regulations']}"
|
136 |
-
graph.node(result_label, result_str, shape='box', style='filled', fillcolor='lightgrey')
|
137 |
-
graph.edge(parent_label, result_label, color='black')
|
138 |
-
|
139 |
-
# Create a new graph
|
140 |
-
dot = graphviz.Digraph(comment='Decision Tree')
|
141 |
-
# Add the root node
|
142 |
-
dot.node('Root', 'Start', shape='ellipse', style='filled', fillcolor='lightyellow')
|
143 |
-
# Build the DOT format
|
144 |
-
json_to_dot(dot, "Root", response, "Root")
|
145 |
-
# Render and display the graph using Graphviz engine
|
146 |
-
dot.format = 'png'
|
147 |
-
dot.render('decision_tree', view=True)
|
148 |
-
import streamlit as st
|
149 |
-
with st.chat_message(""):
|
150 |
-
st.write("")
|
151 |
-
st.image('decision_tree.png', caption='tree from json')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
st.title('Uber pickups in NYC')
|
6 |
+
|
7 |
+
DATE_COLUMN = 'date/time'
|
8 |
+
DATA_URL = ('https://s3-us-west-2.amazonaws.com/'
|
9 |
+
'streamlit-demo-data/uber-raw-data-sep14.csv.gz')
|
10 |
+
|
11 |
+
@st.cache_data
|
12 |
+
def load_data(nrows):
|
13 |
+
data = pd.read_csv(DATA_URL, nrows=nrows)
|
14 |
+
lowercase = lambda x: str(x).lower()
|
15 |
+
data.rename(lowercase, axis='columns', inplace=True)
|
16 |
+
data[DATE_COLUMN] = pd.to_datetime(data[DATE_COLUMN])
|
17 |
+
return data
|
18 |
+
|
19 |
+
data_load_state = st.text('Loading data...')
|
20 |
+
data = load_data(10000)
|
21 |
+
data_load_state.text("Done! (using st.cache)")
|
22 |
+
|
23 |
+
if st.checkbox('Show raw data'):
|
24 |
+
st.subheader('Raw data')
|
25 |
+
st.write(data)
|
26 |
+
|
27 |
+
st.subheader('Number of pickups by hour')
|
28 |
+
hist_values = np.histogram(data[DATE_COLUMN].dt.hour, bins=24, range=(0,24))[0]
|
29 |
+
st.bar_chart(hist_values)
|
30 |
+
|
31 |
+
# Some number in the range 0-23
|
32 |
+
hour_to_filter = st.slider('hour', 0, 23, 17)
|
33 |
+
filtered_data = data[data[DATE_COLUMN].dt.hour == hour_to_filter]
|
34 |
+
|
35 |
+
st.subheader('Map of all pickups at %s:00' % hour_to_filter)
|
36 |
+
st.map(filtered_data)
|
37 |
+
|
38 |
+
uploaded_file = st.file_uploader("Choose a file")
|
39 |
+
if uploaded_file is not None:
|
40 |
+
st.write(uploaded_file.name)
|
41 |
+
bytes_data = uploaded_file.getvalue()
|
42 |
+
st.write(len(bytes_data), "bytes")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|