Spaces:
Sleeping
Sleeping
Commit
·
41b28fa
1
Parent(s):
a4ab17e
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,5 @@
|
|
1 |
-
|
2 |
import gradio as gr
|
3 |
import os
|
4 |
-
# import pandas as pd
|
5 |
-
# import pickle
|
6 |
from typing import List
|
7 |
|
8 |
from langchain.llms import OpenAIChat
|
@@ -22,28 +19,21 @@ given the following extracted parts of a long document and a question. If the qu
|
|
22 |
includes a request for code, provide a code block directly from the documentation. If you don't know the answer, just
|
23 |
say "Hmm, I'm not sure." Don't try to make up an answer. If the question is not about Arthur, politely inform them that
|
24 |
you are tuned to only answer questions about Arthur.
|
25 |
-
|
26 |
=========
|
27 |
Example 1:
|
28 |
-
|
29 |
Question: What data do I need to send to Arthur?
|
30 |
=========
|
31 |
**3. What if my data is proprietary? Can I still use Arthur?**
|
32 |
-
|
33 |
Yes! Arthur offers on-premises installation for customers with data security requirements. By integrating Arthur
|
34 |
into your business's on-premises stack, you can be confident that all security requirements are met while still
|
35 |
getting the benefits of the computation and analytics Arthur provides.
|
36 |
***
|
37 |
-
|
38 |
**4. What if I don’t have ground truth labels for my data? Or what if I will have the ground truth labels in the future,
|
39 |
but they are not available yet?**
|
40 |
-
|
41 |
You don't need ground truth labels to log your model's inferences with Arthur.
|
42 |
-
|
43 |
If your ground truth labels become available after your model's inferences, whether seconds later or years later,
|
44 |
Arthur can link these new ground truth values to your model's past predictions, linking the new values by ID to
|
45 |
their corresponding inferences already in the Arthur system.
|
46 |
-
|
47 |
In the meantime, Arthur’s data drift metrics can offer leading indicators of model underperformance to keep you
|
48 |
covered if your ground truth labels are delayed or never become available.
|
49 |
***
|
@@ -51,7 +41,6 @@ covered if your ground truth labels are delayed or never become available.
|
|
51 |
Answer in Markdown:
|
52 |
The data you need to get into Arthur is only the inference data - no ground truth is needed, since it can be uploaded
|
53 |
at a later time. Also, if you have proprietary data, you can install Arthur on-premises to keep your own data security protocols.
|
54 |
-
|
55 |
=========
|
56 |
Now the real question:
|
57 |
Question: {question}
|
@@ -64,14 +53,7 @@ RESPONSE_PROMPT = PromptTemplate(
|
|
64 |
)
|
65 |
|
66 |
|
67 |
-
|
68 |
-
# with open("files/vectorstores/arthur_vectorstore.pkl", "rb") as f:
|
69 |
-
# global arthur_vectorstore
|
70 |
-
# arthur_vectorstore = pickle.load(f)
|
71 |
-
|
72 |
-
arthur_vectorstore = None
|
73 |
-
|
74 |
-
def ingest_docs(dir_name, vectorstore_name):
|
75 |
loader = DirectoryLoader(dir_name)
|
76 |
raw_documents = loader.load()
|
77 |
text_splitter = RecursiveCharacterTextSplitter(
|
@@ -80,19 +62,13 @@ def ingest_docs(dir_name, vectorstore_name):
|
|
80 |
)
|
81 |
documents = text_splitter.split_documents(raw_documents)
|
82 |
embeddings = OpenAIEmbeddings()
|
83 |
-
|
84 |
-
|
85 |
-
arthur_vectorstore = vectorstore
|
86 |
-
|
87 |
-
# # Save vectorstore
|
88 |
-
# with open(f"files/vectorstores/{vectorstore_name}_vectorstore.pkl", "wb") as f:
|
89 |
-
# pickle.dump(vectorstore, f)
|
90 |
|
91 |
|
92 |
def get_langchain_agent(api_key):
|
93 |
os.environ["OPENAI_API_KEY"] = api_key
|
94 |
|
95 |
-
|
96 |
|
97 |
manager = CallbackManager([])
|
98 |
question_manager = CallbackManager([])
|
@@ -118,7 +94,7 @@ def get_langchain_agent(api_key):
|
|
118 |
)
|
119 |
|
120 |
agent = ChatVectorDBChain(
|
121 |
-
vectorstore=
|
122 |
combine_docs_chain=chat_response_generator,
|
123 |
question_generator=question_generator,
|
124 |
callback_manager=manager,
|
@@ -140,16 +116,6 @@ def get_source_doc(output):
|
|
140 |
return source_text, source_doc_link
|
141 |
|
142 |
|
143 |
-
# def log_inference(chat_history: List[List[str]], llm_feedback: int) -> None:
|
144 |
-
# reference_data = pd.read_csv("files/reference_data.csv", index_col=None)
|
145 |
-
# chat_text = []
|
146 |
-
# for user_text, bot_text in chat_history:
|
147 |
-
# bot_text = bot_text.replace("\n", "").replace("<br>", "")
|
148 |
-
# chat_text.append(f"input:<{user_text}>,output:<{bot_text}>,")
|
149 |
-
# reference_data.loc[len(reference_data)] = {"chat": "".join(chat_text), "llm_feedback": llm_feedback}
|
150 |
-
# reference_data.to_csv("files/reference_data.csv", index=False)
|
151 |
-
|
152 |
-
|
153 |
def chat(inp, history, agent):
|
154 |
history = history or []
|
155 |
result = agent({"question": inp, "chat_history": history})
|
@@ -198,11 +164,7 @@ def launch_ask_arthur(share=False):
|
|
198 |
],
|
199 |
inputs=message,
|
200 |
)
|
201 |
-
|
202 |
-
# llm_feedback = gr.Radio(
|
203 |
-
# ["0","1","2"], value="0", label="How useful was this? (0 = bad, 1 = meh, 2 = good)"
|
204 |
-
# )
|
205 |
-
# submit_feedback_button = gr.Button("Submit feedback")
|
206 |
with gr.Column():
|
207 |
source_link = gr.Markdown()
|
208 |
source_page = gr.Markdown()
|
@@ -217,13 +179,7 @@ def launch_ask_arthur(share=False):
|
|
217 |
submit_message.click(chat, inputs=[message, state, agent_state], outputs=[chatbot, state, source_page, source_link])
|
218 |
message.submit(chat, inputs=[message, state, agent_state], outputs=[chatbot, state, source_page, source_link])
|
219 |
|
220 |
-
|
221 |
-
# submit_feedback_button.click(
|
222 |
-
# log_inference,
|
223 |
-
# [chatbot, llm_feedback],
|
224 |
-
# )
|
225 |
-
|
226 |
demo.queue().launch(share=share)
|
227 |
|
228 |
|
229 |
-
launch_ask_arthur()
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
|
|
|
|
3 |
from typing import List
|
4 |
|
5 |
from langchain.llms import OpenAIChat
|
|
|
19 |
includes a request for code, provide a code block directly from the documentation. If you don't know the answer, just
|
20 |
say "Hmm, I'm not sure." Don't try to make up an answer. If the question is not about Arthur, politely inform them that
|
21 |
you are tuned to only answer questions about Arthur.
|
|
|
22 |
=========
|
23 |
Example 1:
|
|
|
24 |
Question: What data do I need to send to Arthur?
|
25 |
=========
|
26 |
**3. What if my data is proprietary? Can I still use Arthur?**
|
|
|
27 |
Yes! Arthur offers on-premises installation for customers with data security requirements. By integrating Arthur
|
28 |
into your business's on-premises stack, you can be confident that all security requirements are met while still
|
29 |
getting the benefits of the computation and analytics Arthur provides.
|
30 |
***
|
|
|
31 |
**4. What if I don’t have ground truth labels for my data? Or what if I will have the ground truth labels in the future,
|
32 |
but they are not available yet?**
|
|
|
33 |
You don't need ground truth labels to log your model's inferences with Arthur.
|
|
|
34 |
If your ground truth labels become available after your model's inferences, whether seconds later or years later,
|
35 |
Arthur can link these new ground truth values to your model's past predictions, linking the new values by ID to
|
36 |
their corresponding inferences already in the Arthur system.
|
|
|
37 |
In the meantime, Arthur’s data drift metrics can offer leading indicators of model underperformance to keep you
|
38 |
covered if your ground truth labels are delayed or never become available.
|
39 |
***
|
|
|
41 |
Answer in Markdown:
|
42 |
The data you need to get into Arthur is only the inference data - no ground truth is needed, since it can be uploaded
|
43 |
at a later time. Also, if you have proprietary data, you can install Arthur on-premises to keep your own data security protocols.
|
|
|
44 |
=========
|
45 |
Now the real question:
|
46 |
Question: {question}
|
|
|
53 |
)
|
54 |
|
55 |
|
56 |
+
def get_docs_vectorstore(dir_name):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
loader = DirectoryLoader(dir_name)
|
58 |
raw_documents = loader.load()
|
59 |
text_splitter = RecursiveCharacterTextSplitter(
|
|
|
62 |
)
|
63 |
documents = text_splitter.split_documents(raw_documents)
|
64 |
embeddings = OpenAIEmbeddings()
|
65 |
+
return FAISS.from_documents(documents, embeddings)
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
|
68 |
def get_langchain_agent(api_key):
|
69 |
os.environ["OPENAI_API_KEY"] = api_key
|
70 |
|
71 |
+
vectorstore = get_docs_vectorstore("files/arthur-docs-markdown")
|
72 |
|
73 |
manager = CallbackManager([])
|
74 |
question_manager = CallbackManager([])
|
|
|
94 |
)
|
95 |
|
96 |
agent = ChatVectorDBChain(
|
97 |
+
vectorstore=vectorstore,
|
98 |
combine_docs_chain=chat_response_generator,
|
99 |
question_generator=question_generator,
|
100 |
callback_manager=manager,
|
|
|
116 |
return source_text, source_doc_link
|
117 |
|
118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
def chat(inp, history, agent):
|
120 |
history = history or []
|
121 |
result = agent({"question": inp, "chat_history": history})
|
|
|
164 |
],
|
165 |
inputs=message,
|
166 |
)
|
167 |
+
|
|
|
|
|
|
|
|
|
168 |
with gr.Column():
|
169 |
source_link = gr.Markdown()
|
170 |
source_page = gr.Markdown()
|
|
|
179 |
submit_message.click(chat, inputs=[message, state, agent_state], outputs=[chatbot, state, source_page, source_link])
|
180 |
message.submit(chat, inputs=[message, state, agent_state], outputs=[chatbot, state, source_page, source_link])
|
181 |
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
demo.queue().launch(share=share)
|
183 |
|
184 |
|
185 |
+
launch_ask_arthur()
|