Spaces:

hyperdemocracy
/

hf-legisqa

Running

App Files Files Community

gabrielaltay commited on Aug 3, 2024

Commit

da0f003

1 Parent(s): 69c42d0

side by side

Browse files

Files changed (1) hide show

app.py +161 -56

app.py CHANGED Viewed

@@ -304,8 +304,8 @@ def render_generative_config(key_prefix: str):
     )
     st.slider(
         "max_output_tokens",
-        min_value=512,
-        max_value=1024,
         key=f"{key_prefix}|max_output_tokens",
     )
     st.slider(
@@ -379,6 +379,62 @@ def get_llm(key_prefix: str):
     return llm
 def render_sidebar():
     with st.container(border=True):
@@ -398,7 +454,7 @@ def render_query_rag_tab():
         with st.expander("Retrieval Config"):
             render_retrieval_config(key_prefix)
-    QUERY_TEMPLATE = """You are an expert legislative analyst. Use the following excerpts from US congressional legislation to respond to the user's query. The excerpts are formatted as a JSON list. Each JSON object has "legis_id", "title", "introduced_date", "sponsor", and "snippets" keys. If a snippet is useful in writing part of your response, then cite the "legis_id", "title", "introduced_date", and "sponsor" in the response. If you don't know how to respond, just tell the user.
 ---
@@ -412,11 +468,11 @@ Query: {query}"""
     prompt = ChatPromptTemplate.from_messages(
         [
-            ("human", QUERY_TEMPLATE),
         ]
     )
-    with st.form("query_form"):
         st.text_area(
             "Enter a query that can be answered with congressional legislation:",
             key=f"{key_prefix}|query",
@@ -437,8 +493,8 @@ Query: {query}"""
                     "docs": retriever,  # list of docs
                     "query": RunnablePassthrough(),  # str
                 }
-            ).assign(context=(lambda x: format_docs(x["docs"])))
-            #            .assign(output=prompt | llm | StrOutputParser())
             .assign(output=prompt | llm)
         )
@@ -482,65 +538,114 @@ Query: {query}"""
             st.write(SS[f"{key_prefix}|out"])
-def get_token_usage(key_prefix: str, metadata: dict):
-    if SS[f"{key_prefix}|model_name"] in OPENAI_CHAT_MODELS:
-        model_info = PROVIDER_MODELS["OpenAI"][SS[f"{key_prefix}|model_name"]]
-        return get_openai_token_usage(metadata, model_info)
-    elif SS[f"{key_prefix}|model_name"] in ANTHROPIC_CHAT_MODELS:
-        model_info = PROVIDER_MODELS["Anthropic"][SS[f"{key_prefix}|model_name"]]
-        return get_anthropic_token_usage(metadata, model_info)
-    elif SS[f"{key_prefix}|model_name"] in TOGETHER_CHAT_MODELS:
-        model_info = PROVIDER_MODELS["Together"][SS[f"{key_prefix}|model_name"]]
-        return get_together_token_usage(metadata, model_info)
-    else:
-        raise ValueError()
-def get_openai_token_usage(metadata: dict, model_info: dict):
-    input_tokens = metadata["token_usage"]["prompt_tokens"]
-    output_tokens = metadata["token_usage"]["completion_tokens"]
-    cost = (
-        input_tokens * 1e-6 * model_info["cost"]["pmi"]
-        + output_tokens * 1e-6 * model_info["cost"]["pmo"]
-    )
-    return {
-        "input_tokens": input_tokens,
-        "output_tokens": output_tokens,
-        "cost": cost,
-    }
-def get_anthropic_token_usage(metadata: dict, model_info: dict):
-    input_tokens = metadata["usage"]["input_tokens"]
-    output_tokens = metadata["usage"]["output_tokens"]
-    cost = (
-        input_tokens * 1e-6 * model_info["cost"]["pmi"]
-        + output_tokens * 1e-6 * model_info["cost"]["pmo"]
-    )
-    return {
-        "input_tokens": input_tokens,
-        "output_tokens": output_tokens,
-        "cost": cost,
-    }
-def get_together_token_usage(metadata: dict, model_info: dict):
-    input_tokens = metadata["token_usage"]["prompt_tokens"]
-    output_tokens = metadata["token_usage"]["completion_tokens"]
-    cost = (
-        input_tokens * 1e-6 * model_info["cost"]["pmi"]
-        + output_tokens * 1e-6 * model_info["cost"]["pmo"]
     )
-    return {
-        "input_tokens": input_tokens,
-        "output_tokens": output_tokens,
-        "cost": cost,
-    }
-def render_query_rag_sbs_tab():
-    return
 ##################

     )
     st.slider(
         "max_output_tokens",
+        min_value=1024,
+        max_value=2048,
         key=f"{key_prefix}|max_output_tokens",
     )
     st.slider(
     return llm
+def get_token_usage(key_prefix: str, metadata: dict):
+    if SS[f"{key_prefix}|model_name"] in OPENAI_CHAT_MODELS:
+        model_info = PROVIDER_MODELS["OpenAI"][SS[f"{key_prefix}|model_name"]]
+        return get_openai_token_usage(metadata, model_info)
+    elif SS[f"{key_prefix}|model_name"] in ANTHROPIC_CHAT_MODELS:
+        model_info = PROVIDER_MODELS["Anthropic"][SS[f"{key_prefix}|model_name"]]
+        return get_anthropic_token_usage(metadata, model_info)
+    elif SS[f"{key_prefix}|model_name"] in TOGETHER_CHAT_MODELS:
+        model_info = PROVIDER_MODELS["Together"][SS[f"{key_prefix}|model_name"]]
+        return get_together_token_usage(metadata, model_info)
+    else:
+        raise ValueError()
+def get_openai_token_usage(metadata: dict, model_info: dict):
+    input_tokens = metadata["token_usage"]["prompt_tokens"]
+    output_tokens = metadata["token_usage"]["completion_tokens"]
+    cost = (
+        input_tokens * 1e-6 * model_info["cost"]["pmi"]
+        + output_tokens * 1e-6 * model_info["cost"]["pmo"]
+    )
+    return {
+        "input_tokens": input_tokens,
+        "output_tokens": output_tokens,
+        "cost": cost,
+    }
+def get_anthropic_token_usage(metadata: dict, model_info: dict):
+    input_tokens = metadata["usage"]["input_tokens"]
+    output_tokens = metadata["usage"]["output_tokens"]
+    cost = (
+        input_tokens * 1e-6 * model_info["cost"]["pmi"]
+        + output_tokens * 1e-6 * model_info["cost"]["pmo"]
+    )
+    return {
+        "input_tokens": input_tokens,
+        "output_tokens": output_tokens,
+        "cost": cost,
+    }
+def get_together_token_usage(metadata: dict, model_info: dict):
+    input_tokens = metadata["token_usage"]["prompt_tokens"]
+    output_tokens = metadata["token_usage"]["completion_tokens"]
+    cost = (
+        input_tokens * 1e-6 * model_info["cost"]["pmi"]
+        + output_tokens * 1e-6 * model_info["cost"]["pmo"]
+    )
+    return {
+        "input_tokens": input_tokens,
+        "output_tokens": output_tokens,
+        "cost": cost,
+    }
 def render_sidebar():
     with st.container(border=True):
         with st.expander("Retrieval Config"):
             render_retrieval_config(key_prefix)
+    QUERY_RAG_TEMPLATE = """You are an expert legislative analyst. Use the following excerpts from US congressional legislation to respond to the user's query. The excerpts are formatted as a JSON list. Each JSON object has "legis_id", "title", "introduced_date", "sponsor", and "snippets" keys. If a snippet is useful in writing part of your response, then cite the "legis_id", "title", "introduced_date", and "sponsor" in the response. If you don't know how to respond, just tell the user.
 ---
     prompt = ChatPromptTemplate.from_messages(
         [
+            ("human", QUERY_RAG_TEMPLATE),
         ]
     )
+    with st.form(f"{key_prefix}|query_form"):
         st.text_area(
             "Enter a query that can be answered with congressional legislation:",
             key=f"{key_prefix}|query",
                     "docs": retriever,  # list of docs
                     "query": RunnablePassthrough(),  # str
                 }
+            )
+            .assign(context=(lambda x: format_docs(x["docs"])))
             .assign(output=prompt | llm)
         )
             st.write(SS[f"{key_prefix}|out"])
+def render_query_rag_sbs_tab():
+    QUERY_RAG_TEMPLATE = """You are an expert legislative analyst. Use the following excerpts from US congressional legislation to respond to the user's query. The excerpts are formatted as a JSON list. Each JSON object has "legis_id", "title", "introduced_date", "sponsor", and "snippets" keys. If a snippet is useful in writing part of your response, then cite the "legis_id", "title", "introduced_date", and "sponsor" in the response. If you don't know how to respond, just tell the user.
+---
+Congressional Legislation Excerpts:
+{context}
+---
+Query: {query}"""
+    base_key_prefix = "query_rag_sbs"
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            ("human", QUERY_RAG_TEMPLATE),
+        ]
     )
+    with st.form(f"{base_key_prefix}|query_form"):
+        st.text_area(
+            "Enter a query that can be answered with congressional legislation:",
+            key=f"{base_key_prefix}|query",
+        )
+        query_submitted = st.form_submit_button("Submit")
+    grp1a, grp2a = st.columns(2)
+    with grp1a:
+        st.header("Group 1")
+        key_prefix = f"{base_key_prefix}|grp1"
+        with st.expander("Generative Config"):
+            render_generative_config(key_prefix)
+        with st.expander("Retrieval Config"):
+            render_retrieval_config(key_prefix)
+    with grp2a:
+        st.header("Group 2")
+        key_prefix = f"{base_key_prefix}|grp2"
+        with st.expander("Generative Config"):
+            render_generative_config(key_prefix)
+        with st.expander("Retrieval Config"):
+            render_retrieval_config(key_prefix)
+    grp1b, grp2b = st.columns(2)
+    sbs_cols = {"grp1": grp1b, "grp2": grp2b}
+    for post_key_prefix in ["grp1", "grp2"]:
+        key_prefix = f"{base_key_prefix}|{post_key_prefix}"
+        if query_submitted:
+            llm = get_llm(key_prefix)
+            vs_filter = get_vectorstore_filter(key_prefix)
+            retriever = vectorstore.as_retriever(
+                search_kwargs={
+                    "k": SS[f"{key_prefix}|n_ret_docs"],
+                    "filter": vs_filter,
+                },
+            )
+            rag_chain = (
+                RunnableParallel(
+                    {
+                        "docs": retriever,  # list of docs
+                        "query": RunnablePassthrough(),  # str
+                    }
+                )
+                .assign(context=(lambda x: format_docs(x["docs"])))
+                .assign(output=prompt | llm)
+            )
+            SS[f"{key_prefix}|out"] = rag_chain.invoke(SS[f"{base_key_prefix}|query"])
+        if f"{key_prefix}|out" in SS:
+            with sbs_cols[post_key_prefix]:
+                out_display = SS[f"{key_prefix}|out"]["output"].content
+                if SS[f"{key_prefix}|response_escape_markdown"]:
+                    out_display = escape_markdown(out_display)
+                if SS[f"{key_prefix}|response_add_legis_urls"]:
+                    out_display = replace_legis_ids_with_urls(out_display)
+                with st.container(border=True):
+                    st.write("Response")
+                    st.info(out_display)
+                with st.container(border=True):
+                    st.write("API Usage")
+                    token_usage = get_token_usage(
+                        key_prefix, SS[f"{key_prefix}|out"]["output"].response_metadata
+                    )
+                    col1, col2, col3 = st.columns(3)
+                    with col1:
+                        st.metric("Input Tokens", token_usage["input_tokens"])
+                    with col2:
+                        st.metric("Output Tokens", token_usage["output_tokens"])
+                    with col3:
+                        st.metric("Cost", f"${token_usage['cost']:.4f}")
+                    with st.expander("Response Metadata"):
+                        st.warning(SS[f"{key_prefix}|out"]["output"].response_metadata)
+                with st.container(border=True):
+                    doc_grps = group_docs(SS[f"{key_prefix}|out"]["docs"])
+                    st.write(
+                        "Retrieved Chunks (note that you may need to 'right click' on links in the expanders to follow them)"
+                    )
+                    for legis_id, doc_grp in doc_grps:
+                        render_doc_grp(legis_id, doc_grp)
 ##################