ofermend commited on
Commit
8bdf672
·
1 Parent(s): d1c53b9
Files changed (6) hide show
  1. README.md +2 -2
  2. agent.py +46 -52
  3. app.py +4 -52
  4. requirements.txt +4 -5
  5. st_app.py +6 -6
  6. utils.py +4 -1
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: CFPB Assistant
3
  emoji: 🐨
4
  colorFrom: indigo
5
  colorTo: indigo
@@ -7,7 +7,7 @@ sdk: docker
7
  app_port: 8501
8
  pinned: false
9
  license: apache-2.0
10
- short_description: CFPB Assistant using vectara-agentic
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: HMC Demo
3
  emoji: 🐨
4
  colorFrom: indigo
5
  colorTo: indigo
 
7
  app_port: 8501
8
  pinned: false
9
  license: apache-2.0
10
+ short_description: Ask questions about Harvard Management
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
agent.py CHANGED
@@ -3,44 +3,54 @@ from typing import Optional
3
  from pydantic import Field, BaseModel
4
  from omegaconf import OmegaConf
5
 
6
- from llama_index.core.utilities.sql_wrapper import SQLDatabase
7
- from sqlalchemy import create_engine
8
 
9
  from dotenv import load_dotenv
10
  load_dotenv(override=True)
11
 
12
- from vectara_agentic.agent import Agent
13
- from vectara_agentic.tools import ToolsFactory, VectaraToolFactory
14
 
15
- def create_assistant_tools(cfg):
16
 
17
- class QueryCFPBComplaints(BaseModel):
18
  query: str = Field(description="The user query.")
19
- company: Optional[str] = Field(
 
 
 
 
 
 
 
 
 
 
20
  default=None,
21
- description="The company that the complaint is about.",
22
- examples=['CAPITAL ONE FINANCIAL CORPORATION', 'BANK OF AMERICA, NATIONAL ASSOCIATION', 'CITIBANK, N.A.', 'WELLS FARGO & COMPANY', 'JPMORGAN CHASE & CO.']
23
  )
24
- state: Optional[str] = Field(
25
  default=None,
26
- description="The two-character state code where the consumer lives.",
27
- examples=['CA', 'FL', 'NY', 'TX', 'GA']
28
  )
29
 
30
  vec_factory = VectaraToolFactory(
31
- vectara_api_key=cfg.api_keys,
32
  vectara_customer_id=cfg.customer_id,
33
- vectara_corpus_id=cfg.corpus_ids
34
  )
35
 
36
- summarizer = 'vectara-experimental-summary-ext-2023-12-11-med-omni'
37
- ask_complaints = vec_factory.create_rag_tool(
38
- tool_name = "ask_complaints",
 
39
  tool_description = """
40
  Given a user query,
41
- returns a response to a user question about customer complaints for bank services.
42
  """,
43
- tool_args_schema = QueryCFPBComplaints,
44
  reranker = "chain", rerank_k = 100,
45
  rerank_chain = [
46
  {
@@ -49,46 +59,30 @@ def create_assistant_tools(cfg):
49
  },
50
  {
51
  "type": "mmr",
52
- "diversity_bias": 0.4,
53
- "limit": 30
54
  }
55
  ],
56
  n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.005,
57
  vectara_summarizer = summarizer,
 
58
  include_citations = True,
59
  )
60
-
61
- tools_factory = ToolsFactory()
62
-
63
- db_tools = tools_factory.database_tools(
64
- tool_name_prefix = "cfpb",
65
- content_description = 'Customer complaints about five banks (Bank of America, Wells Fargo, Capital One, Chase, and CITI Bank) and geographic information (counties and zip codes)',
66
- sql_database = SQLDatabase(create_engine('sqlite:///cfpb_database.db')),
67
- )
68
-
69
- return (tools_factory.standard_tools() +
70
- tools_factory.guardrail_tools() +
71
- db_tools +
72
- [ask_complaints]
73
- )
74
 
75
  def initialize_agent(_cfg, agent_progress_callback=None):
76
- cfpb_complaints_bot_instructions = """
77
- - You are a helpful research assistant,
78
- with expertise in finance and complaints from the CFPB (Consumer Financial Protection Bureau),
79
- in conversation with a user.
80
- - For analytical/numeric questions, try to use the cfpb_load_data and other database tools.
81
- - For questions about customers' complaints (the text of the complaint), use the ask_complaints tool.
82
- You only need the query parameter to use this tool, but you can supply other parameters if provided.
83
- Do not include the "References" section in your response.
84
- - Never discuss politics, and always respond politely.
85
  """
86
 
87
  agent = Agent(
88
  tools=create_assistant_tools(_cfg),
89
- topic="Customer complaints from the Consumer Financial Protection Bureau (CFPB)",
90
- custom_instructions=cfpb_complaints_bot_instructions,
91
- agent_progress_callback=agent_progress_callback
92
  )
93
  agent.report()
94
  return agent
@@ -97,11 +91,11 @@ def initialize_agent(_cfg, agent_progress_callback=None):
97
  def get_agent_config() -> OmegaConf:
98
  cfg = OmegaConf.create({
99
  'customer_id': str(os.environ['VECTARA_CUSTOMER_ID']),
100
- 'corpus_ids': str(os.environ['VECTARA_CORPUS_IDS']),
101
- 'api_keys': str(os.environ['VECTARA_API_KEYS']),
102
  'examples': os.environ.get('QUERY_EXAMPLES', None),
103
- 'demo_name': "cfpb-assistant",
104
- 'demo_welcome': "Welcome to the CFPB Customer Complaints demo.",
105
- 'demo_description': "This assistant can help you gain insights into customer complaints to banks recorded by the Consumer Financial Protection Bureau.",
106
  })
107
  return cfg
 
3
  from pydantic import Field, BaseModel
4
  from omegaconf import OmegaConf
5
 
6
+ from vectara_agentic.agent import Agent
7
+ from vectara_agentic.tools import VectaraToolFactory
8
 
9
  from dotenv import load_dotenv
10
  load_dotenv(override=True)
11
 
12
+ initial_prompt = "How can I help you today?"
 
13
 
14
+ def create_assistant_tools(cfg):
15
 
16
+ class QueryHMC(BaseModel):
17
  query: str = Field(description="The user query.")
18
+ ticker: Optional[str] = Field(
19
+ default=None,
20
+ description="The company ticker.",
21
+ examples=['GOOG', 'META']
22
+ )
23
+ year: Optional[str] = Field(
24
+ default=None,
25
+ description="The year of the report.",
26
+ examples=[2020, 2023]
27
+ )
28
+ quarter: Optional[int] = Field(
29
  default=None,
30
+ description="The quarter of the report.",
31
+ examples=[1, 2, 3, 4]
32
  )
33
+ filing_type: Optional[str] = Field(
34
  default=None,
35
+ description="The type of filing.",
36
+ examples=['10K', '10Q']
37
  )
38
 
39
  vec_factory = VectaraToolFactory(
40
+ vectara_api_key=cfg.api_key,
41
  vectara_customer_id=cfg.customer_id,
42
+ vectara_corpus_id=cfg.corpus_id
43
  )
44
 
45
+ #summarizer = 'vectara-experimental-summary-ext-2023-12-11-med-omni'
46
+ summarizer = 'vectara-summary-ext-24-05-med-omni'
47
+ ask_hmc = vec_factory.create_rag_tool(
48
+ tool_name = "ask_hmc",
49
  tool_description = """
50
  Given a user query,
51
+ returns a response to a user question about fund management companies.
52
  """,
53
+ tool_args_schema = QueryHMC,
54
  reranker = "chain", rerank_k = 100,
55
  rerank_chain = [
56
  {
 
59
  },
60
  {
61
  "type": "mmr",
62
+ "diversity_bias": 0.05,
63
+ "limit": 20
64
  }
65
  ],
66
  n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.005,
67
  vectara_summarizer = summarizer,
68
+ summary_num_results = 10,
69
  include_citations = True,
70
  )
71
+ return [ask_hmc]
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  def initialize_agent(_cfg, agent_progress_callback=None):
74
+ bot_instructions = """
75
+ - You are a helpful assistant, with expertise in management of public company stock portfolios.
76
+ - Use the 'ask_hmc' tool to answer questions about public company performance, risks, and other financial metrics.
77
+ - Use the year, quarter, filing_type and ticker arguments to the 'ask_hmc' tool to get more specific answers.
78
+ - Note that 10Q reports exist for quarters 1, 2, 3 and for the 4th quarter there is a 10K report.
 
 
 
 
79
  """
80
 
81
  agent = Agent(
82
  tools=create_assistant_tools(_cfg),
83
+ topic="Endowment fund management",
84
+ custom_instructions=bot_instructions,
85
+ agent_progress_callback=agent_progress_callback,
86
  )
87
  agent.report()
88
  return agent
 
91
  def get_agent_config() -> OmegaConf:
92
  cfg = OmegaConf.create({
93
  'customer_id': str(os.environ['VECTARA_CUSTOMER_ID']),
94
+ 'corpus_id': str(os.environ['VECTARA_CORPUS_ID']),
95
+ 'api_key': str(os.environ['VECTARA_API_KEY']),
96
  'examples': os.environ.get('QUERY_EXAMPLES', None),
97
+ 'demo_name': "HMC Demo",
98
+ 'demo_welcome': "HMC Assistant.",
99
+ 'demo_description': "AI assistant For Harvard Management Company.",
100
  })
101
  return cfg
app.py CHANGED
@@ -1,14 +1,9 @@
1
- import os
2
-
3
  import streamlit as st
4
  from st_app import launch_bot
 
5
 
6
  import nest_asyncio
7
  import asyncio
8
- import uuid
9
-
10
- import sqlite3
11
- from datasets import load_dataset
12
 
13
  # Setup for HTTP API Calls to Amplitude Analytics
14
  if 'device_id' not in st.session_state:
@@ -17,50 +12,7 @@ if 'device_id' not in st.session_state:
17
  if "feedback_key" not in st.session_state:
18
  st.session_state.feedback_key = 0
19
 
20
- def setup_db():
21
- db_path = 'cfpb_database.db'
22
- conn = sqlite3.connect(db_path)
23
- cursor = conn.cursor()
24
-
25
- with st.spinner("Loading data... Please wait..."):
26
- def table_populated() -> bool:
27
- cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='cfpb_complaints'")
28
- result = cursor.fetchone()
29
- if not result:
30
- return False
31
- return True
32
-
33
- if table_populated():
34
- print("Database table already populated, skipping setup")
35
- conn.close()
36
- return
37
- else:
38
- print("Populating database table")
39
-
40
- # Execute the SQL commands to create the database table
41
- with open('create_table.sql', 'r') as sql_file:
42
- sql_script = sql_file.read()
43
- cursor.executescript(sql_script)
44
-
45
- hf_token = os.getenv('HF_TOKEN')
46
-
47
- # Load data into cfpb_complaints table
48
- df = load_dataset("vectara/cfpb-complaints", data_files="cfpb_complaints.csv", token=hf_token)['train'].to_pandas()
49
- df.to_sql('cfpb_complaints', conn, if_exists='replace', index=False)
50
-
51
- df = load_dataset("vectara/cfpb-complaints", data_files="cfpb_county_populations.csv", token=hf_token)['train'].to_pandas()
52
- df.to_sql('cfpb_county_populations', conn, if_exists='replace', index=False)
53
-
54
- df = load_dataset("vectara/cfpb-complaints", data_files="cfpb_zip_to_county.csv", token=hf_token)['train'].to_pandas()
55
- df.to_sql('cfpb_zip_to_county', conn, if_exists='replace', index=False)
56
-
57
- # Commit changes and close connection
58
- conn.commit()
59
- conn.close()
60
-
61
  if __name__ == "__main__":
62
- st.set_page_config(page_title="CFPB Complaints Assistant", layout="wide")
63
- setup_db()
64
-
65
- nest_asyncio.apply()
66
- asyncio.run(launch_bot())
 
 
 
1
  import streamlit as st
2
  from st_app import launch_bot
3
+ import uuid
4
 
5
  import nest_asyncio
6
  import asyncio
 
 
 
 
7
 
8
  # Setup for HTTP API Calls to Amplitude Analytics
9
  if 'device_id' not in st.session_state:
 
12
  if "feedback_key" not in st.session_state:
13
  st.session_state.feedback_key = 0
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  if __name__ == "__main__":
16
+ st.set_page_config(page_title="Harvard Management Company Assistant", layout="wide")
17
+ nest_asyncio.apply()
18
+ asyncio.run(launch_bot())
 
 
requirements.txt CHANGED
@@ -1,10 +1,9 @@
1
  omegaconf==2.3.0
2
  python-dotenv==1.0.1
3
- streamlit==1.39.0
4
  streamlit_pills==0.3.0
5
- streamlit-feedback==0.1.3
 
6
  langdetect==1.0.9
7
  langcodes==3.4.0
8
- datasets==2.19.2
9
- uuid==1.30
10
- vectara-agentic==0.1.19
 
1
  omegaconf==2.3.0
2
  python-dotenv==1.0.1
3
+ streamlit==1.41.1
4
  streamlit_pills==0.3.0
5
+ streamlit_feedback==0.1.3
6
+ uuid==1.30
7
  langdetect==1.0.9
8
  langcodes==3.4.0
9
+ vectara-agentic==0.1.20
 
 
st_app.py CHANGED
@@ -91,12 +91,12 @@ async def launch_bot():
91
  if st.button('Show Logs'):
92
  show_modal()
93
 
94
- st.divider()
95
- st.markdown(
96
- "## How this works?\n"
97
- "This app was built with [Vectara](https://vectara.com).\n\n"
98
- "It demonstrates the use of Agentic RAG functionality with Vectara"
99
- )
100
 
101
  if "messages" not in st.session_state.keys():
102
  reset()
 
91
  if st.button('Show Logs'):
92
  show_modal()
93
 
94
+ # st.divider()
95
+ # st.markdown(
96
+ # "## How this works?\n"
97
+ # "This app was built with [Vectara](https://vectara.com).\n\n"
98
+ # "It demonstrates the use of Agentic RAG functionality with Vectara"
99
+ # )
100
 
101
  if "messages" not in st.session_state.keys():
102
  reset()
utils.py CHANGED
@@ -31,8 +31,11 @@ def thumbs_feedback(feedback, **kwargs):
31
 
32
  def send_amplitude_data(user_query, bot_response, demo_name, feedback=None):
33
  # Send query and response to Amplitude Analytics
 
 
 
34
  data = {
35
- "api_key": os.getenv('AMPLITUDE_TOKEN'),
36
  "events": [{
37
  "device_id": st.session_state.device_id,
38
  "event_type": "submitted_query",
 
31
 
32
  def send_amplitude_data(user_query, bot_response, demo_name, feedback=None):
33
  # Send query and response to Amplitude Analytics
34
+ amplitude_token = os.environ.get('AMPLITUDE_TOKEN', None)
35
+ if amplitude_token is None:
36
+ return
37
  data = {
38
+ "api_key": amplitude_token,
39
  "events": [{
40
  "device_id": st.session_state.device_id,
41
  "event_type": "submitted_query",