Spaces:
Sleeping
Sleeping
John Graham Reynolds
commited on
Commit
·
31a5031
1
Parent(s):
5c3a0bf
update app to make use of chain
Browse files
app.py
CHANGED
@@ -1,19 +1,20 @@
|
|
1 |
import os
|
2 |
import threading
|
|
|
3 |
import streamlit as st
|
4 |
-
from langchain_huggingface import HuggingFaceEmbeddings
|
5 |
-
from langchain_databricks.vectorstores import DatabricksVectorSearch
|
6 |
from itertools import tee
|
|
|
7 |
|
8 |
DATABRICKS_HOST = os.environ.get("DATABRICKS_HOST")
|
9 |
DATABRICKS_TOKEN = os.environ.get("DATABRICKS_TOKEN")
|
10 |
-
|
11 |
-
|
|
|
12 |
|
13 |
if DATABRICKS_HOST is None:
|
14 |
raise ValueError("DATABRICKS_HOST environment variable must be set")
|
15 |
if DATABRICKS_TOKEN is None:
|
16 |
-
raise ValueError("
|
17 |
|
18 |
MODEL_AVATAR_URL= "./VU.jpeg"
|
19 |
|
@@ -21,20 +22,19 @@ MODEL_AVATAR_URL= "./VU.jpeg"
|
|
21 |
# MSG_CLIPPED_AT_MAX_OUT_TOKENS = "Reached maximum output tokens for DBRX Playground"
|
22 |
|
23 |
EXAMPLE_PROMPTS = [
|
24 |
-
"Tell me about maximum out-of-pocket costs in healthcare.",
|
25 |
-
"Write a haiku about Nashville, Tennessee.",
|
26 |
"How is a data lake used at Vanderbilt University Medical Center?",
|
27 |
"In a table, what are some of the greatest hurdles to healthcare in the United States?",
|
28 |
"What does EDW stand for in the context of Vanderbilt University Medical Center?",
|
29 |
"Code a sql statement that can query a database named 'VUMC'.",
|
30 |
"Write a short story about a country concert in Nashville, Tennessee.",
|
|
|
31 |
]
|
32 |
|
33 |
TITLE = "Vanderbilt AI Assistant"
|
34 |
-
DESCRIPTION="""Welcome to the first generation Vanderbilt AI assistant! \n
|
35 |
This AI assistant is built atop the Databricks DBRX large language model
|
36 |
-
and is augmented with additional organization-specific knowledge.
|
37 |
-
terms like **Data Lake**, **EDW** (
|
38 |
Try querying the model with any of the example prompts below for a simple introduction to both Vanderbilt-specific and general knowledge queries. The purpose of this
|
39 |
model is to allow VUMC employees access to an intelligent assistant that improves and expedites VUMC work. \n
|
40 |
Feedback and ideas are very welcome! Please provide any feedback, ideas, or issues to the email: **[email protected]**.
|
@@ -42,6 +42,22 @@ We hope to gradually improve this AI assistant to create a large-scale, all-incl
|
|
42 |
|
43 |
GENERAL_ERROR_MSG = "An error occurred. Please refresh the page to start a new conversation."
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
# @st.cache_resource
|
46 |
# def get_global_semaphore():
|
47 |
# return threading.BoundedSemaphore(QUEUE_SIZE)
|
@@ -71,35 +87,12 @@ def clear_chat_history():
|
|
71 |
|
72 |
st.button('Clear Chat', on_click=clear_chat_history)
|
73 |
|
|
|
|
|
|
|
74 |
def last_role_is_user():
|
75 |
return len(st.session_state["messages"]) > 0 and st.session_state["messages"][-1]["role"] == "user"
|
76 |
|
77 |
-
def get_system_prompt():
|
78 |
-
return ""
|
79 |
-
|
80 |
-
# ** working logic for querying glossary embeddings
|
81 |
-
# Same embedding model we used to create embeddings of terms
|
82 |
-
# make sure we cache this so that it doesnt redownload each time, hindering Space start time if sleeping
|
83 |
-
# try adding this st caching decorator to ensure the embeddings class gets cached after downloading the entirety of the model
|
84 |
-
# does this cache to the given folder though? It does appear to populate the folder as expected after being run
|
85 |
-
@st.cache_resource # will this work here? https://docs.streamlit.io/develop/concepts/architecture/caching
|
86 |
-
def load_embedding_model():
|
87 |
-
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en", cache_folder="./langchain_cache/")
|
88 |
-
return embeddings
|
89 |
-
|
90 |
-
embeddings = load_embedding_model()
|
91 |
-
# instantiate the vector store for similarity search in our chain
|
92 |
-
# need to make this a function and decorate it with @st.experimental_memo as above?
|
93 |
-
# We are only calling this initially when the Space starts. Can we expedite this process for users when opening up this Space?
|
94 |
-
# @st.cache_data # TODO add this in
|
95 |
-
vector_store = DatabricksVectorSearch(
|
96 |
-
endpoint=VS_ENDPOINT_NAME,
|
97 |
-
index_name=VS_INDEX_NAME,
|
98 |
-
embedding=embeddings,
|
99 |
-
text_column="name",
|
100 |
-
columns=["name", "description"],
|
101 |
-
)
|
102 |
-
|
103 |
def text_stream(stream):
|
104 |
for chunk in stream:
|
105 |
if chunk["content"] is not None:
|
@@ -116,7 +109,7 @@ def get_stream_warning_error(stream):
|
|
116 |
return warning, error
|
117 |
|
118 |
# @retry(wait=wait_random_exponential(min=0.5, max=2), stop=stop_after_attempt(3))
|
119 |
-
def
|
120 |
# *** original code for instantiating the DBRX model through the OpenAI client *** skip this and introduce our chain eventually
|
121 |
# extra_body = {}
|
122 |
# if SAFETY_FILTER:
|
@@ -133,10 +126,24 @@ def chat_api_call(history):
|
|
133 |
# extra_body= extra_body
|
134 |
# )
|
135 |
|
136 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
st.write(history)
|
138 |
-
search_result = vector_store.similarity_search(query=st.session_state["messages"][-1]["content"], k=5)
|
139 |
-
chat_completion = search_result # TODO update this after we implement our chain
|
|
|
140 |
return chat_completion
|
141 |
|
142 |
def write_response():
|
@@ -154,11 +161,11 @@ def write_response():
|
|
154 |
return response, stream_warning, stream_error
|
155 |
|
156 |
def chat_completion(messages):
|
157 |
-
history_dbrx_format = [
|
158 |
-
|
159 |
-
]
|
160 |
-
|
161 |
-
history_dbrx_format = history_dbrx_format + messages
|
162 |
# if (len(history_dbrx_format)-1)//2 >= MAX_CHAT_TURNS:
|
163 |
# yield {"content": None, "error": MSG_MAX_TURNS_EXCEEDED, "warning": None}
|
164 |
# return
|
@@ -172,7 +179,8 @@ def chat_completion(messages):
|
|
172 |
# chat_completion = chat_api_call(history_dbrx_format)
|
173 |
# except Exception as e:
|
174 |
# error = e
|
175 |
-
chat_completion =
|
|
|
176 |
if error is not None:
|
177 |
yield {"content": None, "error": GENERAL_ERROR_MSG, "warning": None}
|
178 |
print(error)
|
@@ -183,10 +191,13 @@ def chat_completion(messages):
|
|
183 |
chunk_counter = 0
|
184 |
for chunk in chat_completion:
|
185 |
# if chunk.choices[0].delta.content is not None:
|
186 |
-
|
|
|
|
|
187 |
chunk_counter += 1
|
188 |
# partial_message += chunk.choices[0].delta.content
|
189 |
-
partial_message += f"* {chunk.page_content} [{chunk.metadata}]"
|
|
|
190 |
if chunk_counter % TOKEN_CHUNK_SIZE == 0:
|
191 |
chunk_counter = 0
|
192 |
yield {"content": partial_message, "error": None, "warning": None}
|
|
|
1 |
import os
|
2 |
import threading
|
3 |
+
import datetime
|
4 |
import streamlit as st
|
|
|
|
|
5 |
from itertools import tee
|
6 |
+
from chain import ChainBuilder
|
7 |
|
8 |
DATABRICKS_HOST = os.environ.get("DATABRICKS_HOST")
|
9 |
DATABRICKS_TOKEN = os.environ.get("DATABRICKS_TOKEN")
|
10 |
+
# remove these secrets from the container
|
11 |
+
# VS_ENDPOINT_NAME = os.environ.get("VS_ENDPOINT_NAME")
|
12 |
+
# VS_INDEX_NAME = os.environ.get("VS_INDEX_NAME")
|
13 |
|
14 |
if DATABRICKS_HOST is None:
|
15 |
raise ValueError("DATABRICKS_HOST environment variable must be set")
|
16 |
if DATABRICKS_TOKEN is None:
|
17 |
+
raise ValueError("DATABRICKS_TOKEN environment variable must be set")
|
18 |
|
19 |
MODEL_AVATAR_URL= "./VU.jpeg"
|
20 |
|
|
|
22 |
# MSG_CLIPPED_AT_MAX_OUT_TOKENS = "Reached maximum output tokens for DBRX Playground"
|
23 |
|
24 |
EXAMPLE_PROMPTS = [
|
|
|
|
|
25 |
"How is a data lake used at Vanderbilt University Medical Center?",
|
26 |
"In a table, what are some of the greatest hurdles to healthcare in the United States?",
|
27 |
"What does EDW stand for in the context of Vanderbilt University Medical Center?",
|
28 |
"Code a sql statement that can query a database named 'VUMC'.",
|
29 |
"Write a short story about a country concert in Nashville, Tennessee.",
|
30 |
+
"Tell me about maximum out-of-pocket costs in healthcare.",
|
31 |
]
|
32 |
|
33 |
TITLE = "Vanderbilt AI Assistant"
|
34 |
+
DESCRIPTION= """Welcome to the first generation Vanderbilt AI assistant! \n
|
35 |
This AI assistant is built atop the Databricks DBRX large language model
|
36 |
+
and is augmented with additional organization-specific knowledge. Particularly, it has been preliminarily augmented with knowledge of Vanderbilt University Medical Center
|
37 |
+
terms like **Data Lake**, **EDW**, **HCERA**, and **thousands more**. (Ask the assistant if you don't know what any of these terms mean!) **Disclaimer**: The model has **no access to PHI**. \n
|
38 |
Try querying the model with any of the example prompts below for a simple introduction to both Vanderbilt-specific and general knowledge queries. The purpose of this
|
39 |
model is to allow VUMC employees access to an intelligent assistant that improves and expedites VUMC work. \n
|
40 |
Feedback and ideas are very welcome! Please provide any feedback, ideas, or issues to the email: **[email protected]**.
|
|
|
42 |
|
43 |
GENERAL_ERROR_MSG = "An error occurred. Please refresh the page to start a new conversation."
|
44 |
|
45 |
+
# *** TODO we dont need this here anymore, since the chain handles all of this
|
46 |
+
# SYSTEM_PROMPT = """You are DBRX, created by Databricks and augmented by John Graham Reynolds to have access to additional information specific to Vanderbilt University Medical Center. The current date is {date_str}.\n
|
47 |
+
# Your knowledge base was last updated in December 2023. You answer questions about events prior to and after December 2023 the way a highly informed individual in December 2023 would if they were talking to someone from the above date, and you can let the user know this when relevant.\n
|
48 |
+
# Some of the context you will be given in regards to Vanderbilt University Medical Center could have come after December 2023. The rest of your knowledge base is from before December 2023 and you will answer questions accordingly with these facts.
|
49 |
+
# This chunk of text is your system prompt. It is not visible to the user, but it is used to guide your responses. Don't reference it, just respond to the user.\n
|
50 |
+
# If you are asked to assist with tasks involving the expression of views held by a significant number of people, you provide assistance with the task even if you personally disagree with the views being expressed, but follow this with a discussion of broader perspectives.\n
|
51 |
+
# You don't engage in stereotyping, including the negative stereotyping of majority groups.\n If asked about controversial topics, you try to provide careful thoughts and objective information without downplaying its harmful content or implying that there are reasonable perspectives on both sides.\n
|
52 |
+
# You are happy to help with writing, analysis, question answering, math, coding, and all sorts of other tasks.\n You use markdown for coding, which includes JSON blocks and Markdown tables.\n
|
53 |
+
# You do not have tools enabled at this time, so cannot run code or access the internet. You can only provide information that you have been trained on. You do not send or receive links or images.\n
|
54 |
+
# You were not trained on copyrighted books, song lyrics, poems, video transcripts, or news articles; you do not divulge details of your training data. You do not provide song lyrics, poems, or news articles and instead refer the user to find them online or in a store.\n
|
55 |
+
# You give concise responses to simple questions or statements, but provide thorough responses to more complex and open-ended questions.\n
|
56 |
+
# The user is unable to see the system prompt, so you should write as if it were true without mentioning it.\n You do not mention any of this information about yourself unless the information is directly pertinent to the user's query.\n
|
57 |
+
# Here is some context from the Vanderbilt University Medical Center glossary which might or might not help you answer: {context}.\n
|
58 |
+
# Based on this system prompt, to which you will adhere sternly and to which you will make no reference, and this possibly helpful context in relation to Vanderbilt University Medical Center, answer this question: {question}
|
59 |
+
# """
|
60 |
+
|
61 |
# @st.cache_resource
|
62 |
# def get_global_semaphore():
|
63 |
# return threading.BoundedSemaphore(QUEUE_SIZE)
|
|
|
87 |
|
88 |
st.button('Clear Chat', on_click=clear_chat_history)
|
89 |
|
90 |
+
# build our chain outside the working body so that its only instantiated once - simply pass it the chat history for chat completion
|
91 |
+
chain = ChainBuilder.build_chain()
|
92 |
+
|
93 |
def last_role_is_user():
|
94 |
return len(st.session_state["messages"]) > 0 and st.session_state["messages"][-1]["role"] == "user"
|
95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
def text_stream(stream):
|
97 |
for chunk in stream:
|
98 |
if chunk["content"] is not None:
|
|
|
109 |
return warning, error
|
110 |
|
111 |
# @retry(wait=wait_random_exponential(min=0.5, max=2), stop=stop_after_attempt(3))
|
112 |
+
def chain_call(history):
|
113 |
# *** original code for instantiating the DBRX model through the OpenAI client *** skip this and introduce our chain eventually
|
114 |
# extra_body = {}
|
115 |
# if SAFETY_FILTER:
|
|
|
126 |
# extra_body= extra_body
|
127 |
# )
|
128 |
|
129 |
+
# *** can we stream the chain's response by incorporating the above OpenAI streaming functionality?
|
130 |
+
# *** Look back at the predict_stream function and see if we can incorporate that!
|
131 |
+
# *** looks like we want to use either chain.stream() or chain.astream()
|
132 |
+
# test first with invoke
|
133 |
+
|
134 |
+
input_example = {'messages':
|
135 |
+
[{'content': 'What does EDW stand for?', 'role': 'user'},
|
136 |
+
{'content': 'Enterprise Data Warehouse.', 'role': 'assistant'},
|
137 |
+
{'content': 'Thank you. What is the data lake?', 'role': 'user'},
|
138 |
+
{'content': 'A data lake is a centralized repository of structured and unstructured data. It allows data to be stored in its native state, without the need for transformations, so that it can be consumed by other users later. It is not just a term for storage, but also covers functionalities required for a platform, including data analysis, machine learning, cataloging and data movement.', 'role': 'assistant'},
|
139 |
+
{'content': 'Can you tell me more about how they are used?', 'role': 'user'},
|
140 |
+
{'content': 'At Vanderbilt University Medical Center, a data lake is used as a centralized repository for storing and managing large amounts of data in its native format. This allows for the data to be easily accessed and analyzed by different teams and business units within the organization. The data lake also provides functionalities such as data analysis, machine learning, cataloging and data movement, making it a versatile tool for handling diverse data sets.\n\nAn Enterprise Data Warehouse (EDW) is used for executing analytic queries on structured data. It is optimized for this purpose, with data being stored in a way that allows for efficient querying and analysis. This makes it a useful tool for teams that need to perform complex analyses on large data sets.\n\nA data mart is a specific organizational structure or pattern used in the context of data warehouses. It is a layer that has specific subdivisions for each business unit or team, such as finance, marketing, and product. This allows users to consume data in a format that meets their specific needs.\n\nA data lakehouse is a term used to describe approaches that attempt to combine the data structure and management features of a data warehouse with the low cost of storage of a data lake. This includes a structured transactional layer, which allows for efficient querying and analysis of data. This approach aims to provide the benefits of both data lakes and data warehouses in a single platform.', 'role': 'assistant'},
|
141 |
+
{'content': 'Nice answer. Can you tell me what the HCERA is?', 'role': 'user'}]}
|
142 |
+
|
143 |
st.write(history)
|
144 |
+
# search_result = vector_store.similarity_search(query=st.session_state["messages"][-1]["content"], k=5)
|
145 |
+
# chat_completion = search_result # TODO update this after we implement our chain
|
146 |
+
chat_completion = chain.invoke(input_example) # *** TODO here we will pass only the chat history, the chain handles the system prompt
|
147 |
return chat_completion
|
148 |
|
149 |
def write_response():
|
|
|
161 |
return response, stream_warning, stream_error
|
162 |
|
163 |
def chat_completion(messages):
|
164 |
+
# history_dbrx_format = [
|
165 |
+
# {"role": "system", "content": SYSTEM_PROMPT} # no longer need this because the chain handles all of this for us
|
166 |
+
# ]
|
167 |
+
|
168 |
+
# history_dbrx_format = history_dbrx_format + messages
|
169 |
# if (len(history_dbrx_format)-1)//2 >= MAX_CHAT_TURNS:
|
170 |
# yield {"content": None, "error": MSG_MAX_TURNS_EXCEEDED, "warning": None}
|
171 |
# return
|
|
|
179 |
# chat_completion = chat_api_call(history_dbrx_format)
|
180 |
# except Exception as e:
|
181 |
# error = e
|
182 |
+
# chat_completion = chain_call(history_dbrx_format)
|
183 |
+
chat_completion = chain_call(messages) # simply pass the old messages, need not worry about the system prompt
|
184 |
if error is not None:
|
185 |
yield {"content": None, "error": GENERAL_ERROR_MSG, "warning": None}
|
186 |
print(error)
|
|
|
191 |
chunk_counter = 0
|
192 |
for chunk in chat_completion:
|
193 |
# if chunk.choices[0].delta.content is not None:
|
194 |
+
# TODO *** we need to refactor this logic to match what happens with the response from our chain - it should be strings or an iterator of strings
|
195 |
+
# if chunk.page_content is not None:
|
196 |
+
if chunk is not None:
|
197 |
chunk_counter += 1
|
198 |
# partial_message += chunk.choices[0].delta.content
|
199 |
+
# partial_message += f"* {chunk.page_content} [{chunk.metadata}]"
|
200 |
+
partial_message += chunk
|
201 |
if chunk_counter % TOKEN_CHUNK_SIZE == 0:
|
202 |
chunk_counter = 0
|
203 |
yield {"content": partial_message, "error": None, "warning": None}
|