Spaces:

maxcembalest
/

ask-arthur

Sleeping

App Files Files Community

maxcembalest commited on Mar 28, 2023

Commit

41b28fa

1 Parent(s): a4ab17e

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -50

app.py CHANGED Viewed

@@ -1,8 +1,5 @@
 import gradio as gr
 import os
-# import pandas as pd
-# import pickle
 from typing import List
 from langchain.llms import OpenAIChat
@@ -22,28 +19,21 @@ given the following extracted parts of a long document and a question. If the qu
 includes a request for code, provide a code block directly from the documentation. If you don't know the answer, just
 say "Hmm, I'm not sure." Don't try to make up an answer. If the question is not about Arthur, politely inform them that
 you are tuned to only answer questions about Arthur.
 =========
 Example 1:
 Question: What data do I need to send to Arthur?
 =========
 **3. What if my data is proprietary? Can I still use Arthur?**
 Yes! Arthur offers on-premises installation for customers with data security requirements. By integrating Arthur
 into your business's on-premises stack, you can be confident that all security requirements are met while still
 getting the benefits of the computation and analytics Arthur provides.
 ***
 **4. What if I don’t have ground truth labels for my data? Or what if I will have the ground truth labels in the future,
 but they are not available yet?**
 You don't need ground truth labels to log your model's inferences with Arthur.
 If your ground truth labels become available after your model's inferences, whether seconds later or years later,
 Arthur can link these new ground truth values to your model's past predictions, linking the new values by ID to
 their corresponding inferences already in the Arthur system.
 In the meantime, Arthur’s data drift metrics can offer leading indicators of model underperformance to keep you
 covered if your ground truth labels are delayed or never become available.
 ***
@@ -51,7 +41,6 @@ covered if your ground truth labels are delayed or never become available.
 Answer in Markdown:
 The data you need to get into Arthur is only the inference data - no ground truth is needed, since it can be uploaded
 at a later time. Also, if you have proprietary data, you can install Arthur on-premises to keep your own data security protocols.
 =========
 Now the real question:
 Question: {question}
@@ -64,14 +53,7 @@ RESPONSE_PROMPT = PromptTemplate(
 )
-# # load vectorstore of embeddings
-# with open("files/vectorstores/arthur_vectorstore.pkl", "rb") as f:
-#     global arthur_vectorstore
-#     arthur_vectorstore = pickle.load(f)
-arthur_vectorstore = None
-def ingest_docs(dir_name, vectorstore_name):
     loader = DirectoryLoader(dir_name)
     raw_documents = loader.load()
     text_splitter = RecursiveCharacterTextSplitter(
@@ -80,19 +62,13 @@ def ingest_docs(dir_name, vectorstore_name):
     )
     documents = text_splitter.split_documents(raw_documents)
     embeddings = OpenAIEmbeddings()
-    vectorstore = FAISS.from_documents(documents, embeddings)
-    arthur_vectorstore = vectorstore
-    # # Save vectorstore
-    # with open(f"files/vectorstores/{vectorstore_name}_vectorstore.pkl", "wb") as f:
-    #     pickle.dump(vectorstore, f)
 def get_langchain_agent(api_key):
     os.environ["OPENAI_API_KEY"] = api_key
-    ingest_docs("files/arthur-docs-markdown", "arthur")
     manager = CallbackManager([])
     question_manager = CallbackManager([])
@@ -118,7 +94,7 @@ def get_langchain_agent(api_key):
     )
     agent = ChatVectorDBChain(
-        vectorstore=arthur_vectorstore,
         combine_docs_chain=chat_response_generator,
         question_generator=question_generator,
         callback_manager=manager,
@@ -140,16 +116,6 @@ def get_source_doc(output):
     return source_text, source_doc_link
-# def log_inference(chat_history: List[List[str]], llm_feedback: int) -> None:
-#     reference_data = pd.read_csv("files/reference_data.csv", index_col=None)
-#     chat_text = []
-#     for user_text, bot_text in chat_history:
-#         bot_text = bot_text.replace("\n", "").replace("<br>", "")
-#         chat_text.append(f"input:<{user_text}>,output:<{bot_text}>,")
-#     reference_data.loc[len(reference_data)] = {"chat": "".join(chat_text), "llm_feedback": llm_feedback}
-#     reference_data.to_csv("files/reference_data.csv", index=False)
 def chat(inp, history, agent):
     history = history or []
     result = agent({"question": inp, "chat_history": history})
@@ -198,11 +164,7 @@ def launch_ask_arthur(share=False):
                     ],
                     inputs=message,
                 )
-                # # feedback radio button
-                # llm_feedback = gr.Radio(
-                #     ["0","1","2"], value="0", label="How useful was this? (0 = bad, 1 = meh, 2 = good)"
-                # )
-                # submit_feedback_button = gr.Button("Submit feedback")
             with gr.Column():
                 source_link = gr.Markdown()
                 source_page = gr.Markdown()
@@ -217,13 +179,7 @@ def launch_ask_arthur(share=False):
         submit_message.click(chat, inputs=[message, state, agent_state], outputs=[chatbot, state, source_page, source_link])
         message.submit(chat, inputs=[message, state, agent_state], outputs=[chatbot, state, source_page, source_link])
-        # submit_feedback_button.click(
-        #     log_inference,
-        #     [chatbot, llm_feedback],
-        # )
     demo.queue().launch(share=share)
-launch_ask_arthur()

 import gradio as gr
 import os
 from typing import List
 from langchain.llms import OpenAIChat
 includes a request for code, provide a code block directly from the documentation. If you don't know the answer, just
 say "Hmm, I'm not sure." Don't try to make up an answer. If the question is not about Arthur, politely inform them that
 you are tuned to only answer questions about Arthur.
 =========
 Example 1:
 Question: What data do I need to send to Arthur?
 =========
 **3. What if my data is proprietary? Can I still use Arthur?**
 Yes! Arthur offers on-premises installation for customers with data security requirements. By integrating Arthur
 into your business's on-premises stack, you can be confident that all security requirements are met while still
 getting the benefits of the computation and analytics Arthur provides.
 ***
 **4. What if I don’t have ground truth labels for my data? Or what if I will have the ground truth labels in the future,
 but they are not available yet?**
 You don't need ground truth labels to log your model's inferences with Arthur.
 If your ground truth labels become available after your model's inferences, whether seconds later or years later,
 Arthur can link these new ground truth values to your model's past predictions, linking the new values by ID to
 their corresponding inferences already in the Arthur system.
 In the meantime, Arthur’s data drift metrics can offer leading indicators of model underperformance to keep you
 covered if your ground truth labels are delayed or never become available.
 ***
 Answer in Markdown:
 The data you need to get into Arthur is only the inference data - no ground truth is needed, since it can be uploaded
 at a later time. Also, if you have proprietary data, you can install Arthur on-premises to keep your own data security protocols.
 =========
 Now the real question:
 Question: {question}
 )
+def get_docs_vectorstore(dir_name):
     loader = DirectoryLoader(dir_name)
     raw_documents = loader.load()
     text_splitter = RecursiveCharacterTextSplitter(
     )
     documents = text_splitter.split_documents(raw_documents)
     embeddings = OpenAIEmbeddings()
+    return FAISS.from_documents(documents, embeddings)
 def get_langchain_agent(api_key):
     os.environ["OPENAI_API_KEY"] = api_key
+    vectorstore = get_docs_vectorstore("files/arthur-docs-markdown")
     manager = CallbackManager([])
     question_manager = CallbackManager([])
     )
     agent = ChatVectorDBChain(
+        vectorstore=vectorstore,
         combine_docs_chain=chat_response_generator,
         question_generator=question_generator,
         callback_manager=manager,
     return source_text, source_doc_link
 def chat(inp, history, agent):
     history = history or []
     result = agent({"question": inp, "chat_history": history})
                     ],
                     inputs=message,
                 )
             with gr.Column():
                 source_link = gr.Markdown()
                 source_page = gr.Markdown()
         submit_message.click(chat, inputs=[message, state, agent_state], outputs=[chatbot, state, source_page, source_link])
         message.submit(chat, inputs=[message, state, agent_state], outputs=[chatbot, state, source_page, source_link])
     demo.queue().launch(share=share)
+launch_ask_arthur()