Spaces:

MarioBarbeque
/

VanderbiltGlossary

Running

App Files Files Community

John Graham Reynolds commited on Nov 14, 2024

Commit

81f74ed

1 Parent(s): b5c44b5

clean up comments and limit chat history

Browse files

Files changed (1) hide show

chain.py +6 -10

chain.py CHANGED Viewed

@@ -57,11 +57,9 @@ class ChainBuilder:
     def load_embedding_model(self):
         model_name = self.retriever_config.get("embedding_model")
-        # make sure we cache this so that it doesnt redownload each time, hindering Space start time if sleeping
-        # try adding this st caching decorator to ensure the embeddings class gets cached after downloading the entirety of the model
         # cannot directly use @st.cache_resource on a method (function within a class) that has a self argument
-        # does this cache to the given folder though? It does appear to populate the folder as expected after being run
-        @st.cache_resource # will this work here? https://docs.streamlit.io/develop/concepts/architecture/caching
         def load_and_cache_embedding_model(model_name):
             embeddings = HuggingFaceEmbeddings(model_name=model_name, cache_folder="./langchain_cache/") # this cache isnt working because were in the Docker container
         # update this to read from a presaved cache of bge-large
@@ -78,7 +76,7 @@ class ChainBuilder:
         # you cannot directly use @st.cache_resource on a method (function within a class) that has a self argument.
         # This is because Streamlit's caching mechanism relies on hashing the function's code and input parameters, and the self argument represents the instance of the class, which is not hashable by default.
         # 'Cannot hash argument 'embeddings' (of type `langchain_huggingface.embeddings.huggingface.HuggingFaceEmbeddings`) in 'get_and_cache_retriever''
-        # this is fine, we are caching the entire function above for embeddings, so recalling it entirely is fast. We _embeddings to not ignore hashing this argument
         @st.cache_resource # cache the Databricks vector store retriever
         def get_and_cache_retriever(endpoint, index_name, _embeddings, search_kwargs):
             vector_search_as_retriever = DatabricksVectorSearch(
@@ -120,8 +118,6 @@ class ChainBuilder:
         prompt = ChatPromptTemplate.from_messages(
             [
                 ("system", self.get_system_prompt()),
-                # *** Note: This chain does not compress the history, so very long converastions can overflow the context window. TODO
-                # We need to at some point chop this history down to fixed amount of recent messages
                 MessagesPlaceholder(variable_name="formatted_chat_history"), # placeholder for var named 'formatted_chat_history' with messages to be passed
                 # User's most current question
                 ("user", "{question}"),
@@ -130,12 +126,12 @@ class ChainBuilder:
         return prompt # return directly?
     # Format the converastion history to fit into the prompt template above.
-    # **** TODO after only a few statements this will likely overflow the context window
     def format_chat_history_for_prompt(self, chat_messages_array):
         history = self.extract_chat_history(chat_messages_array)
         formatted_chat_history = []
         if len(history) > 0:
-            for chat_message in history:
                 if chat_message["role"] == "user":
                     formatted_chat_history.append(HumanMessage(content=chat_message["content"]))
                 elif chat_message["role"] == "assistant":
@@ -201,5 +197,5 @@ class ChainBuilder:
         )
         return chain
-    # ## Tell MLflow logging where to find your chain.
     # mlflow.models.set_model(model=chain)

     def load_embedding_model(self):
         model_name = self.retriever_config.get("embedding_model")
+        # make sure we cache this so that it doesnt redownload each time
         # cannot directly use @st.cache_resource on a method (function within a class) that has a self argument
+        @st.cache_resource # https://docs.streamlit.io/develop/concepts/architecture/caching
         def load_and_cache_embedding_model(model_name):
             embeddings = HuggingFaceEmbeddings(model_name=model_name, cache_folder="./langchain_cache/") # this cache isnt working because were in the Docker container
         # update this to read from a presaved cache of bge-large
         # you cannot directly use @st.cache_resource on a method (function within a class) that has a self argument.
         # This is because Streamlit's caching mechanism relies on hashing the function's code and input parameters, and the self argument represents the instance of the class, which is not hashable by default.
         # 'Cannot hash argument 'embeddings' (of type `langchain_huggingface.embeddings.huggingface.HuggingFaceEmbeddings`) in 'get_and_cache_retriever''
+        # this is fine, we are caching the entire function above for 'embeddings', so recalling it entirely is fast. We _embeddings to not ignore hashing this argument
         @st.cache_resource # cache the Databricks vector store retriever
         def get_and_cache_retriever(endpoint, index_name, _embeddings, search_kwargs):
             vector_search_as_retriever = DatabricksVectorSearch(
         prompt = ChatPromptTemplate.from_messages(
             [
                 ("system", self.get_system_prompt()),
                 MessagesPlaceholder(variable_name="formatted_chat_history"), # placeholder for var named 'formatted_chat_history' with messages to be passed
                 # User's most current question
                 ("user", "{question}"),
         return prompt # return directly?
     # Format the converastion history to fit into the prompt template above.
     def format_chat_history_for_prompt(self, chat_messages_array):
         history = self.extract_chat_history(chat_messages_array)
         formatted_chat_history = []
         if len(history) > 0:
+            # grab at most just the last three sets of queries and respones as chat history for relevant context - limit history so as to not overflow 32k context window
+            for chat_message in history[-6:]:
                 if chat_message["role"] == "user":
                     formatted_chat_history.append(HumanMessage(content=chat_message["content"]))
                 elif chat_message["role"] == "assistant":
         )
         return chain
+    # ## Tell MLflow logging where to find chain. # TODO can we implement this later for logging?
     # mlflow.models.set_model(model=chain)