HMC-demo

Running

App Files Files Community

ofermend commited on Dec 16, 2024

Commit

8bdf672

1 Parent(s): d1c53b9

initial

Browse files

Files changed (6) hide show

README.md +2 -2
agent.py +46 -52
app.py +4 -52
requirements.txt +4 -5
st_app.py +6 -6
utils.py +4 -1

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: CFPB Assistant
 emoji: 🐨
 colorFrom: indigo
 colorTo: indigo
@@ -7,7 +7,7 @@ sdk: docker
 app_port: 8501
 pinned: false
 license: apache-2.0
-short_description: CFPB Assistant using vectara-agentic
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: HMC Demo
 emoji: 🐨
 colorFrom: indigo
 colorTo: indigo
 app_port: 8501
 pinned: false
 license: apache-2.0
+short_description: Ask questions about Harvard Management
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

agent.py CHANGED Viewed

@@ -3,44 +3,54 @@ from typing import Optional
 from pydantic import Field, BaseModel
 from omegaconf import OmegaConf
-from llama_index.core.utilities.sql_wrapper import SQLDatabase
-from sqlalchemy import create_engine
 from dotenv import load_dotenv
 load_dotenv(override=True)
-from vectara_agentic.agent import Agent
-from vectara_agentic.tools import ToolsFactory, VectaraToolFactory
-def create_assistant_tools(cfg):
-    class QueryCFPBComplaints(BaseModel):
         query: str = Field(description="The user query.")
-        company: Optional[str] = Field(
             default=None,
-            description="The company that the complaint is about.",
-            examples=['CAPITAL ONE FINANCIAL CORPORATION', 'BANK OF AMERICA, NATIONAL ASSOCIATION', 'CITIBANK, N.A.', 'WELLS FARGO & COMPANY', 'JPMORGAN CHASE & CO.']
         )
-        state: Optional[str] = Field(
             default=None,
-            description="The two-character state code where the consumer lives.",
-            examples=['CA', 'FL', 'NY', 'TX', 'GA']
         )
     vec_factory = VectaraToolFactory(
-        vectara_api_key=cfg.api_keys,
         vectara_customer_id=cfg.customer_id,
-        vectara_corpus_id=cfg.corpus_ids
     )
-    summarizer = 'vectara-experimental-summary-ext-2023-12-11-med-omni'
-    ask_complaints = vec_factory.create_rag_tool(
-        tool_name = "ask_complaints",
         tool_description = """
         Given a user query,
-        returns a response to a user question about customer complaints for bank services.
         """,
-        tool_args_schema = QueryCFPBComplaints,
         reranker = "chain", rerank_k = 100,
         rerank_chain = [
             {
@@ -49,46 +59,30 @@ def create_assistant_tools(cfg):
             },
             {
                 "type": "mmr",
-                "diversity_bias": 0.4,
-                "limit": 30
             }
         ],
         n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.005,
         vectara_summarizer = summarizer,
         include_citations = True,
     )
-    tools_factory = ToolsFactory()
-    db_tools = tools_factory.database_tools(
-                tool_name_prefix = "cfpb",
-                content_description = 'Customer complaints about five banks (Bank of America, Wells Fargo, Capital One, Chase, and CITI Bank) and geographic information (counties and zip codes)',
-                sql_database = SQLDatabase(create_engine('sqlite:///cfpb_database.db')),
-            )
-    return (tools_factory.standard_tools() +
-            tools_factory.guardrail_tools() +
-            db_tools +
-            [ask_complaints]
-    )
 def initialize_agent(_cfg, agent_progress_callback=None):
-    cfpb_complaints_bot_instructions = """
-    - You are a helpful research assistant,
-      with expertise in finance and complaints from the CFPB (Consumer Financial Protection Bureau),
-      in conversation with a user.
-    - For analytical/numeric questions, try to use the cfpb_load_data and other database tools.
-    - For questions about customers' complaints (the text of the complaint), use the ask_complaints tool.
-      You only need the query parameter to use this tool, but you can supply other parameters if provided.
-      Do not include the "References" section in your response.
-    - Never discuss politics, and always respond politely.
     """
     agent = Agent(
         tools=create_assistant_tools(_cfg),
-        topic="Customer complaints from the Consumer Financial Protection Bureau (CFPB)",
-        custom_instructions=cfpb_complaints_bot_instructions,
-        agent_progress_callback=agent_progress_callback
     )
     agent.report()
     return agent
@@ -97,11 +91,11 @@ def initialize_agent(_cfg, agent_progress_callback=None):
 def get_agent_config() -> OmegaConf:
     cfg = OmegaConf.create({
         'customer_id': str(os.environ['VECTARA_CUSTOMER_ID']),
-        'corpus_ids': str(os.environ['VECTARA_CORPUS_IDS']),
-        'api_keys': str(os.environ['VECTARA_API_KEYS']),
         'examples': os.environ.get('QUERY_EXAMPLES', None),
-        'demo_name': "cfpb-assistant",
-        'demo_welcome': "Welcome to the CFPB Customer Complaints demo.",
-        'demo_description': "This assistant can help you gain insights into customer complaints to banks recorded by the Consumer Financial Protection Bureau.",
     })
     return cfg

 from pydantic import Field, BaseModel
 from omegaconf import OmegaConf
+from vectara_agentic.agent import Agent
+from vectara_agentic.tools import VectaraToolFactory
 from dotenv import load_dotenv
 load_dotenv(override=True)
+initial_prompt = "How can I help you today?"
+def create_assistant_tools(cfg):
+    class QueryHMC(BaseModel):
         query: str = Field(description="The user query.")
+        ticker: Optional[str] = Field(
+            default=None,
+            description="The company ticker.",
+            examples=['GOOG', 'META']
+        )
+        year: Optional[str] = Field(
+            default=None,
+            description="The year of the report.",
+            examples=[2020, 2023]
+        )
+        quarter: Optional[int] = Field(
             default=None,
+            description="The quarter of the report.",
+            examples=[1, 2, 3, 4]
         )
+        filing_type: Optional[str] = Field(
             default=None,
+            description="The type of filing.",
+            examples=['10K', '10Q']
         )
     vec_factory = VectaraToolFactory(
+        vectara_api_key=cfg.api_key,
         vectara_customer_id=cfg.customer_id,
+        vectara_corpus_id=cfg.corpus_id
     )
+    #summarizer = 'vectara-experimental-summary-ext-2023-12-11-med-omni'
+    summarizer = 'vectara-summary-ext-24-05-med-omni'
+    ask_hmc = vec_factory.create_rag_tool(
+        tool_name = "ask_hmc",
         tool_description = """
         Given a user query,
+        returns a response to a user question about fund management companies.
         """,
+        tool_args_schema = QueryHMC,
         reranker = "chain", rerank_k = 100,
         rerank_chain = [
             {
             },
             {
                 "type": "mmr",
+                "diversity_bias": 0.05,
+                "limit": 20
             }
         ],
         n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.005,
         vectara_summarizer = summarizer,
+        summary_num_results = 10,
         include_citations = True,
     )
+    return [ask_hmc]
 def initialize_agent(_cfg, agent_progress_callback=None):
+    bot_instructions = """
+    - You are a helpful assistant, with expertise in management of public company stock portfolios.
+    - Use the 'ask_hmc' tool to answer questions about public company performance, risks, and other financial metrics.
+    - Use the year, quarter, filing_type and ticker arguments to the 'ask_hmc' tool to get more specific answers.
+    - Note that 10Q reports exist for quarters 1, 2, 3 and for the 4th quarter there is a 10K report.
     """
     agent = Agent(
         tools=create_assistant_tools(_cfg),
+        topic="Endowment fund management",
+        custom_instructions=bot_instructions,
+        agent_progress_callback=agent_progress_callback,
     )
     agent.report()
     return agent
 def get_agent_config() -> OmegaConf:
     cfg = OmegaConf.create({
         'customer_id': str(os.environ['VECTARA_CUSTOMER_ID']),
+        'corpus_id': str(os.environ['VECTARA_CORPUS_ID']),
+        'api_key': str(os.environ['VECTARA_API_KEY']),
         'examples': os.environ.get('QUERY_EXAMPLES', None),
+        'demo_name': "HMC Demo",
+        'demo_welcome': "HMC Assistant.",
+        'demo_description': "AI assistant For Harvard Management Company.",
     })
     return cfg

app.py CHANGED Viewed

@@ -1,14 +1,9 @@
-import os
 import streamlit as st
 from st_app import launch_bot
 import nest_asyncio
 import asyncio
-import uuid
-import sqlite3
-from datasets import load_dataset
 # Setup for HTTP API Calls to Amplitude Analytics
 if 'device_id' not in st.session_state:
@@ -17,50 +12,7 @@ if 'device_id' not in st.session_state:
 if "feedback_key" not in st.session_state:
     st.session_state.feedback_key = 0
-def setup_db():
-    db_path = 'cfpb_database.db'
-    conn = sqlite3.connect(db_path)
-    cursor = conn.cursor()
-    with st.spinner("Loading data... Please wait..."):
-        def table_populated() -> bool:
-            cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='cfpb_complaints'")
-            result = cursor.fetchone()
-            if not result:
-                    return False
-            return True
-        if table_populated():
-            print("Database table already populated, skipping setup")
-            conn.close()
-            return
-        else:
-            print("Populating database table")
-        # Execute the SQL commands to create the database table
-        with open('create_table.sql', 'r') as sql_file:
-            sql_script = sql_file.read()
-            cursor.executescript(sql_script)
-        hf_token = os.getenv('HF_TOKEN')
-        # Load data into cfpb_complaints table
-        df = load_dataset("vectara/cfpb-complaints", data_files="cfpb_complaints.csv", token=hf_token)['train'].to_pandas()
-        df.to_sql('cfpb_complaints', conn, if_exists='replace', index=False)
-        df = load_dataset("vectara/cfpb-complaints", data_files="cfpb_county_populations.csv", token=hf_token)['train'].to_pandas()
-        df.to_sql('cfpb_county_populations', conn, if_exists='replace', index=False)
-        df = load_dataset("vectara/cfpb-complaints", data_files="cfpb_zip_to_county.csv", token=hf_token)['train'].to_pandas()
-        df.to_sql('cfpb_zip_to_county', conn, if_exists='replace', index=False)
-        # Commit changes and close connection
-        conn.commit()
-        conn.close()
 if __name__ == "__main__":
-    st.set_page_config(page_title="CFPB Complaints Assistant", layout="wide")
-    setup_db()
-    nest_asyncio.apply()
-    asyncio.run(launch_bot())

 import streamlit as st
 from st_app import launch_bot
+import uuid
 import nest_asyncio
 import asyncio
 # Setup for HTTP API Calls to Amplitude Analytics
 if 'device_id' not in st.session_state:
 if "feedback_key" not in st.session_state:
     st.session_state.feedback_key = 0
 if __name__ == "__main__":
+   st.set_page_config(page_title="Harvard Management Company Assistant", layout="wide")
+   nest_asyncio.apply()
+   asyncio.run(launch_bot())

requirements.txt CHANGED Viewed

@@ -1,10 +1,9 @@
 omegaconf==2.3.0
 python-dotenv==1.0.1
-streamlit==1.39.0
 streamlit_pills==0.3.0
-streamlit-feedback==0.1.3
 langdetect==1.0.9
 langcodes==3.4.0
-datasets==2.19.2
-uuid==1.30
-vectara-agentic==0.1.19

 omegaconf==2.3.0
 python-dotenv==1.0.1
+streamlit==1.41.1
 streamlit_pills==0.3.0
+streamlit_feedback==0.1.3
+uuid==1.30
 langdetect==1.0.9
 langcodes==3.4.0
+vectara-agentic==0.1.20

st_app.py CHANGED Viewed

@@ -91,12 +91,12 @@ async def launch_bot():
             if st.button('Show Logs'):
                 show_modal()
-        st.divider()
-        st.markdown(
-            "## How this works?\n"
-            "This app was built with [Vectara](https://vectara.com).\n\n"
-            "It demonstrates the use of Agentic RAG functionality with Vectara"
-        )
     if "messages" not in st.session_state.keys():
         reset()

             if st.button('Show Logs'):
                 show_modal()
+        # st.divider()
+        # st.markdown(
+        #     "## How this works?\n"
+        #     "This app was built with [Vectara](https://vectara.com).\n\n"
+        #     "It demonstrates the use of Agentic RAG functionality with Vectara"
+        # )
     if "messages" not in st.session_state.keys():
         reset()

utils.py CHANGED Viewed

@@ -31,8 +31,11 @@ def thumbs_feedback(feedback, **kwargs):
 def send_amplitude_data(user_query, bot_response, demo_name, feedback=None):
     # Send query and response to Amplitude Analytics
     data = {
-        "api_key": os.getenv('AMPLITUDE_TOKEN'),
         "events": [{
             "device_id": st.session_state.device_id,
             "event_type": "submitted_query",

 def send_amplitude_data(user_query, bot_response, demo_name, feedback=None):
     # Send query and response to Amplitude Analytics
+    amplitude_token = os.environ.get('AMPLITUDE_TOKEN', None)
+    if amplitude_token is None:
+        return
     data = {
+        "api_key": amplitude_token,
         "events": [{
             "device_id": st.session_state.device_id,
             "event_type": "submitted_query",