Spaces:
Running
Running
gabrielaltay
commited on
Commit
·
df92eb1
1
Parent(s):
c902b64
update
Browse files
app.py
CHANGED
@@ -6,6 +6,10 @@ import re
|
|
6 |
|
7 |
from langchain_core.documents import Document
|
8 |
from langchain_core.prompts import PromptTemplate
|
|
|
|
|
|
|
|
|
9 |
from langchain_core.runnables import RunnableParallel
|
10 |
from langchain_core.runnables import RunnablePassthrough
|
11 |
from langchain_core.output_parsers import StrOutputParser
|
@@ -111,7 +115,7 @@ def load_pinecone_vectorstore():
|
|
111 |
return vectorstore
|
112 |
|
113 |
|
114 |
-
def
|
115 |
nomic_base_url = "https://atlas.nomic.ai/data/gabrielhyperdemocracy"
|
116 |
nomic_map_name = "us-congressional-legislation-s1024o256nomic"
|
117 |
nomic_url = f"{nomic_base_url}/{nomic_map_name}/map"
|
@@ -238,7 +242,7 @@ def get_vectorstore_filter():
|
|
238 |
return vs_filter
|
239 |
|
240 |
|
241 |
-
def
|
242 |
first_doc = doc_grp[0]
|
243 |
|
244 |
congress_gov_url = get_congress_gov_url(
|
@@ -290,7 +294,7 @@ def replace_legis_ids_with_urls(text):
|
|
290 |
return rtext
|
291 |
|
292 |
|
293 |
-
def
|
294 |
|
295 |
st.write(
|
296 |
"""
|
@@ -311,7 +315,7 @@ Use the `Retrieval Config` to change the number of chunks retrieved from our con
|
|
311 |
)
|
312 |
|
313 |
|
314 |
-
def
|
315 |
|
316 |
with st.expander("Example Queries"):
|
317 |
st.write(
|
@@ -326,23 +330,25 @@ Write a well cited 3 paragraph essay on food insecurity.
|
|
326 |
```
|
327 |
|
328 |
```
|
329 |
-
Create a table summarizing
|
330 |
```
|
331 |
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
##################
|
337 |
|
|
|
|
|
|
|
338 |
|
339 |
-
|
|
|
340 |
|
341 |
|
342 |
-
|
343 |
|
344 |
with st.container(border=True):
|
345 |
-
|
346 |
|
347 |
st.checkbox("escape markdown in answer", key="response_escape_markdown")
|
348 |
st.checkbox("add legis urls in answer", value=True, key="response_add_legis_urls")
|
@@ -392,25 +398,9 @@ with st.sidebar:
|
|
392 |
)
|
393 |
|
394 |
|
395 |
-
|
396 |
-
model_name=SS["model_name"],
|
397 |
-
temperature=SS["temperature"],
|
398 |
-
openai_api_key=st.secrets["openai_api_key"],
|
399 |
-
model_kwargs={"top_p": SS["top_p"], "seed": SEED},
|
400 |
-
)
|
401 |
-
vectorstore = load_pinecone_vectorstore()
|
402 |
-
format_docs = DOC_FORMATTERS[SS["prompt_version"]]
|
403 |
-
vs_filter = get_vectorstore_filter()
|
404 |
-
|
405 |
-
query_tab, guide_tab = st.tabs(["query", "guide"])
|
406 |
-
|
407 |
-
with guide_tab:
|
408 |
-
write_guide()
|
409 |
-
|
410 |
-
|
411 |
-
with query_tab:
|
412 |
|
413 |
-
|
414 |
|
415 |
with st.form("my_form"):
|
416 |
st.text_area("Enter query:", key="query")
|
@@ -418,6 +408,7 @@ with query_tab:
|
|
418 |
|
419 |
if query_submitted:
|
420 |
|
|
|
421 |
retriever = vectorstore.as_retriever(
|
422 |
search_kwargs={"k": SS["n_ret_docs"], "filter": vs_filter},
|
423 |
)
|
@@ -457,7 +448,61 @@ with query_tab:
|
|
457 |
"Retrieved Chunks (note that you may need to 'right click' on links in the expanders to follow them)"
|
458 |
)
|
459 |
for legis_id, doc_grp in doc_grps:
|
460 |
-
|
461 |
|
462 |
with st.expander("Debug"):
|
463 |
st.write(SS["out"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
from langchain_core.documents import Document
|
8 |
from langchain_core.prompts import PromptTemplate
|
9 |
+
from langchain_core.prompts import ChatPromptTemplate
|
10 |
+
from langchain_core.prompts import MessagesPlaceholder
|
11 |
+
from langchain_core.messages import AIMessage
|
12 |
+
from langchain_core.messages import HumanMessage
|
13 |
from langchain_core.runnables import RunnableParallel
|
14 |
from langchain_core.runnables import RunnablePassthrough
|
15 |
from langchain_core.output_parsers import StrOutputParser
|
|
|
115 |
return vectorstore
|
116 |
|
117 |
|
118 |
+
def render_outreach_links():
|
119 |
nomic_base_url = "https://atlas.nomic.ai/data/gabrielhyperdemocracy"
|
120 |
nomic_map_name = "us-congressional-legislation-s1024o256nomic"
|
121 |
nomic_url = f"{nomic_base_url}/{nomic_map_name}/map"
|
|
|
242 |
return vs_filter
|
243 |
|
244 |
|
245 |
+
def render_doc_grp(legis_id: str, doc_grp: list[Document]):
|
246 |
first_doc = doc_grp[0]
|
247 |
|
248 |
congress_gov_url = get_congress_gov_url(
|
|
|
294 |
return rtext
|
295 |
|
296 |
|
297 |
+
def render_guide():
|
298 |
|
299 |
st.write(
|
300 |
"""
|
|
|
315 |
)
|
316 |
|
317 |
|
318 |
+
def render_example_queries():
|
319 |
|
320 |
with st.expander("Example Queries"):
|
321 |
st.write(
|
|
|
330 |
```
|
331 |
|
332 |
```
|
333 |
+
Create a table summarizing major climate change ideas with columns legis_id, title, idea.
|
334 |
```
|
335 |
|
336 |
+
```
|
337 |
+
Write an action plan to keep social security solvent.
|
338 |
+
```
|
|
|
|
|
339 |
|
340 |
+
```
|
341 |
+
Suggest reforms that would benefit the Medicaid program.
|
342 |
+
```
|
343 |
|
344 |
+
"""
|
345 |
+
)
|
346 |
|
347 |
|
348 |
+
def render_sidebar():
|
349 |
|
350 |
with st.container(border=True):
|
351 |
+
render_outreach_links()
|
352 |
|
353 |
st.checkbox("escape markdown in answer", key="response_escape_markdown")
|
354 |
st.checkbox("add legis urls in answer", value=True, key="response_add_legis_urls")
|
|
|
398 |
)
|
399 |
|
400 |
|
401 |
+
def render_query_tab():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
402 |
|
403 |
+
render_example_queries()
|
404 |
|
405 |
with st.form("my_form"):
|
406 |
st.text_area("Enter query:", key="query")
|
|
|
408 |
|
409 |
if query_submitted:
|
410 |
|
411 |
+
vs_filter = get_vectorstore_filter()
|
412 |
retriever = vectorstore.as_retriever(
|
413 |
search_kwargs={"k": SS["n_ret_docs"], "filter": vs_filter},
|
414 |
)
|
|
|
448 |
"Retrieved Chunks (note that you may need to 'right click' on links in the expanders to follow them)"
|
449 |
)
|
450 |
for legis_id, doc_grp in doc_grps:
|
451 |
+
render_doc_grp(legis_id, doc_grp)
|
452 |
|
453 |
with st.expander("Debug"):
|
454 |
st.write(SS["out"])
|
455 |
+
|
456 |
+
|
457 |
+
def render_chat_tab():
|
458 |
+
|
459 |
+
contextualize_q_system_prompt = """Given a chat history and the latest user question \
|
460 |
+
which might reference context in the chat history, formulate a standalone question \
|
461 |
+
which can be understood without the chat history. Do NOT answer the question, \
|
462 |
+
just reformulate it if needed and otherwise return it as is."""
|
463 |
+
contextualize_q_prompt = ChatPromptTemplate.from_messages(
|
464 |
+
[
|
465 |
+
("system", contextualize_q_system_prompt),
|
466 |
+
MessagesPlaceholder(variable_name="chat_history"),
|
467 |
+
("human", "{query}"),
|
468 |
+
]
|
469 |
+
)
|
470 |
+
contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()
|
471 |
+
|
472 |
+
st.write("Coming Soon")
|
473 |
+
|
474 |
+
pass
|
475 |
+
|
476 |
+
|
477 |
+
##################
|
478 |
+
|
479 |
+
|
480 |
+
st.title(
|
481 |
+
":classical_building: LegisQA - Chat With Congressional Bills :classical_building:"
|
482 |
+
)
|
483 |
+
|
484 |
+
|
485 |
+
with st.sidebar:
|
486 |
+
render_sidebar()
|
487 |
+
|
488 |
+
|
489 |
+
llm = ChatOpenAI(
|
490 |
+
model_name=SS["model_name"],
|
491 |
+
temperature=SS["temperature"],
|
492 |
+
openai_api_key=st.secrets["openai_api_key"],
|
493 |
+
model_kwargs={"top_p": SS["top_p"], "seed": SEED},
|
494 |
+
)
|
495 |
+
vectorstore = load_pinecone_vectorstore()
|
496 |
+
format_docs = DOC_FORMATTERS[SS["prompt_version"]]
|
497 |
+
|
498 |
+
|
499 |
+
query_tab, chat_tab, guide_tab = st.tabs(["query", "chat", "guide"])
|
500 |
+
|
501 |
+
with guide_tab:
|
502 |
+
render_guide()
|
503 |
+
|
504 |
+
with query_tab:
|
505 |
+
render_query_tab()
|
506 |
+
|
507 |
+
with chat_tab:
|
508 |
+
render_chat_tab()
|