maxcembalest commited on
Commit
41b28fa
·
1 Parent(s): a4ab17e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -50
app.py CHANGED
@@ -1,8 +1,5 @@
1
-
2
  import gradio as gr
3
  import os
4
- # import pandas as pd
5
- # import pickle
6
  from typing import List
7
 
8
  from langchain.llms import OpenAIChat
@@ -22,28 +19,21 @@ given the following extracted parts of a long document and a question. If the qu
22
  includes a request for code, provide a code block directly from the documentation. If you don't know the answer, just
23
  say "Hmm, I'm not sure." Don't try to make up an answer. If the question is not about Arthur, politely inform them that
24
  you are tuned to only answer questions about Arthur.
25
-
26
  =========
27
  Example 1:
28
-
29
  Question: What data do I need to send to Arthur?
30
  =========
31
  **3. What if my data is proprietary? Can I still use Arthur?**
32
-
33
  Yes! Arthur offers on-premises installation for customers with data security requirements. By integrating Arthur
34
  into your business's on-premises stack, you can be confident that all security requirements are met while still
35
  getting the benefits of the computation and analytics Arthur provides.
36
  ***
37
-
38
  **4. What if I don’t have ground truth labels for my data? Or what if I will have the ground truth labels in the future,
39
  but they are not available yet?**
40
-
41
  You don't need ground truth labels to log your model's inferences with Arthur.
42
-
43
  If your ground truth labels become available after your model's inferences, whether seconds later or years later,
44
  Arthur can link these new ground truth values to your model's past predictions, linking the new values by ID to
45
  their corresponding inferences already in the Arthur system.
46
-
47
  In the meantime, Arthur’s data drift metrics can offer leading indicators of model underperformance to keep you
48
  covered if your ground truth labels are delayed or never become available.
49
  ***
@@ -51,7 +41,6 @@ covered if your ground truth labels are delayed or never become available.
51
  Answer in Markdown:
52
  The data you need to get into Arthur is only the inference data - no ground truth is needed, since it can be uploaded
53
  at a later time. Also, if you have proprietary data, you can install Arthur on-premises to keep your own data security protocols.
54
-
55
  =========
56
  Now the real question:
57
  Question: {question}
@@ -64,14 +53,7 @@ RESPONSE_PROMPT = PromptTemplate(
64
  )
65
 
66
 
67
- # # load vectorstore of embeddings
68
- # with open("files/vectorstores/arthur_vectorstore.pkl", "rb") as f:
69
- # global arthur_vectorstore
70
- # arthur_vectorstore = pickle.load(f)
71
-
72
- arthur_vectorstore = None
73
-
74
- def ingest_docs(dir_name, vectorstore_name):
75
  loader = DirectoryLoader(dir_name)
76
  raw_documents = loader.load()
77
  text_splitter = RecursiveCharacterTextSplitter(
@@ -80,19 +62,13 @@ def ingest_docs(dir_name, vectorstore_name):
80
  )
81
  documents = text_splitter.split_documents(raw_documents)
82
  embeddings = OpenAIEmbeddings()
83
- vectorstore = FAISS.from_documents(documents, embeddings)
84
-
85
- arthur_vectorstore = vectorstore
86
-
87
- # # Save vectorstore
88
- # with open(f"files/vectorstores/{vectorstore_name}_vectorstore.pkl", "wb") as f:
89
- # pickle.dump(vectorstore, f)
90
 
91
 
92
  def get_langchain_agent(api_key):
93
  os.environ["OPENAI_API_KEY"] = api_key
94
 
95
- ingest_docs("files/arthur-docs-markdown", "arthur")
96
 
97
  manager = CallbackManager([])
98
  question_manager = CallbackManager([])
@@ -118,7 +94,7 @@ def get_langchain_agent(api_key):
118
  )
119
 
120
  agent = ChatVectorDBChain(
121
- vectorstore=arthur_vectorstore,
122
  combine_docs_chain=chat_response_generator,
123
  question_generator=question_generator,
124
  callback_manager=manager,
@@ -140,16 +116,6 @@ def get_source_doc(output):
140
  return source_text, source_doc_link
141
 
142
 
143
- # def log_inference(chat_history: List[List[str]], llm_feedback: int) -> None:
144
- # reference_data = pd.read_csv("files/reference_data.csv", index_col=None)
145
- # chat_text = []
146
- # for user_text, bot_text in chat_history:
147
- # bot_text = bot_text.replace("\n", "").replace("<br>", "")
148
- # chat_text.append(f"input:<{user_text}>,output:<{bot_text}>,")
149
- # reference_data.loc[len(reference_data)] = {"chat": "".join(chat_text), "llm_feedback": llm_feedback}
150
- # reference_data.to_csv("files/reference_data.csv", index=False)
151
-
152
-
153
  def chat(inp, history, agent):
154
  history = history or []
155
  result = agent({"question": inp, "chat_history": history})
@@ -198,11 +164,7 @@ def launch_ask_arthur(share=False):
198
  ],
199
  inputs=message,
200
  )
201
- # # feedback radio button
202
- # llm_feedback = gr.Radio(
203
- # ["0","1","2"], value="0", label="How useful was this? (0 = bad, 1 = meh, 2 = good)"
204
- # )
205
- # submit_feedback_button = gr.Button("Submit feedback")
206
  with gr.Column():
207
  source_link = gr.Markdown()
208
  source_page = gr.Markdown()
@@ -217,13 +179,7 @@ def launch_ask_arthur(share=False):
217
  submit_message.click(chat, inputs=[message, state, agent_state], outputs=[chatbot, state, source_page, source_link])
218
  message.submit(chat, inputs=[message, state, agent_state], outputs=[chatbot, state, source_page, source_link])
219
 
220
-
221
- # submit_feedback_button.click(
222
- # log_inference,
223
- # [chatbot, llm_feedback],
224
- # )
225
-
226
  demo.queue().launch(share=share)
227
 
228
 
229
- launch_ask_arthur()
 
 
1
  import gradio as gr
2
  import os
 
 
3
  from typing import List
4
 
5
  from langchain.llms import OpenAIChat
 
19
  includes a request for code, provide a code block directly from the documentation. If you don't know the answer, just
20
  say "Hmm, I'm not sure." Don't try to make up an answer. If the question is not about Arthur, politely inform them that
21
  you are tuned to only answer questions about Arthur.
 
22
  =========
23
  Example 1:
 
24
  Question: What data do I need to send to Arthur?
25
  =========
26
  **3. What if my data is proprietary? Can I still use Arthur?**
 
27
  Yes! Arthur offers on-premises installation for customers with data security requirements. By integrating Arthur
28
  into your business's on-premises stack, you can be confident that all security requirements are met while still
29
  getting the benefits of the computation and analytics Arthur provides.
30
  ***
 
31
  **4. What if I don’t have ground truth labels for my data? Or what if I will have the ground truth labels in the future,
32
  but they are not available yet?**
 
33
  You don't need ground truth labels to log your model's inferences with Arthur.
 
34
  If your ground truth labels become available after your model's inferences, whether seconds later or years later,
35
  Arthur can link these new ground truth values to your model's past predictions, linking the new values by ID to
36
  their corresponding inferences already in the Arthur system.
 
37
  In the meantime, Arthur’s data drift metrics can offer leading indicators of model underperformance to keep you
38
  covered if your ground truth labels are delayed or never become available.
39
  ***
 
41
  Answer in Markdown:
42
  The data you need to get into Arthur is only the inference data - no ground truth is needed, since it can be uploaded
43
  at a later time. Also, if you have proprietary data, you can install Arthur on-premises to keep your own data security protocols.
 
44
  =========
45
  Now the real question:
46
  Question: {question}
 
53
  )
54
 
55
 
56
+ def get_docs_vectorstore(dir_name):
 
 
 
 
 
 
 
57
  loader = DirectoryLoader(dir_name)
58
  raw_documents = loader.load()
59
  text_splitter = RecursiveCharacterTextSplitter(
 
62
  )
63
  documents = text_splitter.split_documents(raw_documents)
64
  embeddings = OpenAIEmbeddings()
65
+ return FAISS.from_documents(documents, embeddings)
 
 
 
 
 
 
66
 
67
 
68
  def get_langchain_agent(api_key):
69
  os.environ["OPENAI_API_KEY"] = api_key
70
 
71
+ vectorstore = get_docs_vectorstore("files/arthur-docs-markdown")
72
 
73
  manager = CallbackManager([])
74
  question_manager = CallbackManager([])
 
94
  )
95
 
96
  agent = ChatVectorDBChain(
97
+ vectorstore=vectorstore,
98
  combine_docs_chain=chat_response_generator,
99
  question_generator=question_generator,
100
  callback_manager=manager,
 
116
  return source_text, source_doc_link
117
 
118
 
 
 
 
 
 
 
 
 
 
 
119
  def chat(inp, history, agent):
120
  history = history or []
121
  result = agent({"question": inp, "chat_history": history})
 
164
  ],
165
  inputs=message,
166
  )
167
+
 
 
 
 
168
  with gr.Column():
169
  source_link = gr.Markdown()
170
  source_page = gr.Markdown()
 
179
  submit_message.click(chat, inputs=[message, state, agent_state], outputs=[chatbot, state, source_page, source_link])
180
  message.submit(chat, inputs=[message, state, agent_state], outputs=[chatbot, state, source_page, source_link])
181
 
 
 
 
 
 
 
182
  demo.queue().launch(share=share)
183
 
184
 
185
+ launch_ask_arthur()