ofermend commited on
Commit
4f8926e
·
1 Parent(s): 02b099e
Files changed (4) hide show
  1. agent.py +8 -8
  2. app.py +49 -1
  3. create_tables.sql +67 -0
  4. requirements.txt +1 -1
agent.py CHANGED
@@ -1,8 +1,10 @@
1
- from omegaconf import OmegaConf
2
  import os
3
-
4
  from typing import Optional
5
  from pydantic import Field, BaseModel
 
 
 
 
6
 
7
  from dotenv import load_dotenv
8
  load_dotenv(override=True)
@@ -71,11 +73,7 @@ def create_assistant_tools(cfg):
71
  tools_factory.guardrail_tools() +
72
  tools_factory.database_tools(
73
  content_description = 'Electric Vehicles',
74
- scheme = 'postgresql',
75
- host = 'localhost', port = '5432',
76
- user = 'ofer',
77
- password = 'noanoa',
78
- dbname = 'ev_database'
79
  ) +
80
  [ask_vehicles, ask_policies]
81
  )
@@ -83,9 +81,11 @@ def create_assistant_tools(cfg):
83
  def initialize_agent(_cfg, update_func):
84
  electric_vehicle_bot_instructions = """
85
  - You are a helpful research assistant, with expertise in electric vehicles, in conversation with a user.
 
86
  - For a query with multiple sub-questions, break down the query into the sub-questions,
87
  and make separate calls to the ask_vehicles or ask_policies tool to answer each sub-question,
88
  then combine the answers to provide a complete response.
 
89
  - Never discuss politics, and always respond politely.
90
  """
91
 
@@ -95,6 +95,7 @@ def initialize_agent(_cfg, update_func):
95
  custom_instructions=electric_vehicle_bot_instructions,
96
  update_func=update_func
97
  )
 
98
  return agent
99
 
100
 
@@ -104,7 +105,6 @@ def get_agent_config() -> OmegaConf:
104
  'corpus_ids': str(os.environ['VECTARA_CORPUS_IDS']).split(','),
105
  'api_keys': str(os.environ['VECTARA_API_KEYS']).split(','),
106
  'examples': os.environ.get('QUERY_EXAMPLES', None),
107
- 'title': "Electric Vehicles in the United States",
108
  'demo_welcome': "Welcome to the EV Assistant demo.",
109
  'demo_description': "This assistant can help you learn about electric vehicles in the United States, including how they work, the advantages of purchasing them, and reviews on the top choices.",
110
  })
 
 
1
  import os
 
2
  from typing import Optional
3
  from pydantic import Field, BaseModel
4
+ from omegaconf import OmegaConf
5
+
6
+ from llama_index.core.utilities.sql_wrapper import SQLDatabase
7
+ from sqlalchemy import create_engine
8
 
9
  from dotenv import load_dotenv
10
  load_dotenv(override=True)
 
73
  tools_factory.guardrail_tools() +
74
  tools_factory.database_tools(
75
  content_description = 'Electric Vehicles',
76
+ sql_database = SQLDatabase(create_engine('sqlite:///ev_database.db')),
 
 
 
 
77
  ) +
78
  [ask_vehicles, ask_policies]
79
  )
 
81
  def initialize_agent(_cfg, update_func):
82
  electric_vehicle_bot_instructions = """
83
  - You are a helpful research assistant, with expertise in electric vehicles, in conversation with a user.
84
+ - Before answering any user query, get sample data from each table in the database, so that you can understand NULL and unique values for each column.
85
  - For a query with multiple sub-questions, break down the query into the sub-questions,
86
  and make separate calls to the ask_vehicles or ask_policies tool to answer each sub-question,
87
  then combine the answers to provide a complete response.
88
+ - Use the database tools to answer analytical questions.
89
  - Never discuss politics, and always respond politely.
90
  """
91
 
 
95
  custom_instructions=electric_vehicle_bot_instructions,
96
  update_func=update_func
97
  )
98
+ agent.report()
99
  return agent
100
 
101
 
 
105
  'corpus_ids': str(os.environ['VECTARA_CORPUS_IDS']).split(','),
106
  'api_keys': str(os.environ['VECTARA_API_KEYS']).split(','),
107
  'examples': os.environ.get('QUERY_EXAMPLES', None),
 
108
  'demo_welcome': "Welcome to the EV Assistant demo.",
109
  'demo_description': "This assistant can help you learn about electric vehicles in the United States, including how they work, the advantages of purchasing them, and reviews on the top choices.",
110
  })
app.py CHANGED
@@ -4,6 +4,10 @@ import sys
4
  import streamlit as st
5
  from streamlit_pills import pills
6
 
 
 
 
 
7
  from vectara_agent.agent import AgentStatusType
8
  from agent import initialize_agent, get_agent_config
9
 
@@ -47,7 +51,6 @@ def launch_bot():
47
  reset()
48
 
49
  cfg = st.session_state.cfg
50
- st.set_page_config(page_title=cfg['title'], layout="wide")
51
 
52
  # left side content
53
  with st.sidebar:
@@ -126,5 +129,50 @@ def launch_bot():
126
 
127
  sys.stdout.flush()
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  if __name__ == "__main__":
 
 
130
  launch_bot()
 
4
  import streamlit as st
5
  from streamlit_pills import pills
6
 
7
+ import sqlite3
8
+ import pandas as pd
9
+ from datasets import load_dataset
10
+
11
  from vectara_agent.agent import AgentStatusType
12
  from agent import initialize_agent, get_agent_config
13
 
 
51
  reset()
52
 
53
  cfg = st.session_state.cfg
 
54
 
55
  # left side content
56
  with st.sidebar:
 
129
 
130
  sys.stdout.flush()
131
 
132
+ def setup_db():
133
+ db_path = 'ev_database.db'
134
+ conn = sqlite3.connect(db_path)
135
+ cursor = conn.cursor()
136
+
137
+ with st.spinner("Loading data... Please wait..."):
138
+ def tables_populated() -> bool:
139
+ tables = ['ev_population', 'county_registrations', 'ev_registrations']
140
+ for table in tables:
141
+ cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table}'")
142
+ result = cursor.fetchone()
143
+ if not result:
144
+ return False
145
+ return True
146
+
147
+ if tables_populated():
148
+ print("Database tables already populated, skipping setup")
149
+ conn.close()
150
+ return
151
+ else:
152
+ print("Populating database tables")
153
+
154
+ # Execute the SQL commands to create tables
155
+ with open('create_tables.sql', 'r') as sql_file:
156
+ sql_script = sql_file.read()
157
+ cursor.executescript(sql_script)
158
+
159
+ # Load data into ev_population table
160
+ df = load_dataset("vectara/ev-dataset", data_files="Electric_Vehicle_Population_Data.csv")['train'].to_pandas()
161
+ df.to_sql('ev_population', conn, if_exists='replace', index=False)
162
+
163
+ # Load data into county_registrations table
164
+ df = load_dataset("vectara/ev-dataset", data_files="Electric_Vehicle_Population_Size_History_By_County.csv")['train'].to_pandas()
165
+ df.to_sql('county_registrations', conn, if_exists='replace', index=False)
166
+
167
+ # Load data into ev_registrations table
168
+ df = load_dataset("vectara/ev-dataset", data_files="Electric_Vehicle_Title_and_Registration_Activity.csv")['train'].to_pandas()
169
+ df.to_sql('ev_registrations', conn, if_exists='replace', index=False)
170
+
171
+ # Commit changes and close connection
172
+ conn.commit()
173
+ conn.close()
174
+
175
  if __name__ == "__main__":
176
+ st.set_page_config(page_title="Electric Vehicles Assistant", layout="wide")
177
+ setup_db()
178
  launch_bot()
create_tables.sql ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CREATE TABLE ev_population (
2
+ vin VARCHAR(10),
3
+ county VARCHAR(20),
4
+ city VARCHAR(24),
5
+ state VARCHAR(2),
6
+ postal_code INTEGER,
7
+ model_year INTEGER,
8
+ make VARCHAR(20),
9
+ model VARCHAR(24),
10
+ ev_type VARCHAR(38),
11
+ cafv_eligibility VARCHAR(60),
12
+ electric_range INTEGER,
13
+ base_msrp INTEGER,
14
+ legislative_district INTEGER,
15
+ dol_vehicle_id INTEGER,
16
+ electric_utility VARCHAR(112)
17
+ );
18
+
19
+ CREATE TABLE county_registrations (
20
+ date DATE,
21
+ county VARCHAR(20),
22
+ state VARCHAR(2),
23
+ primary_use VARCHAR(9),
24
+ battery_evs INTEGER,
25
+ plug_in_hybrids INTEGER,
26
+ total_evs INTEGER,
27
+ total_non_evs INTEGER,
28
+ total_vehicles INTEGER,
29
+ percent_evs REAL
30
+ );
31
+
32
+ CREATE TABLE ev_registrations (
33
+ ev_type VARCHAR(38),
34
+ vin VARCHAR(10),
35
+ dol_vehicle_id INTEGER,
36
+ model_year INTEGER,
37
+ make VARCHAR(20),
38
+ model VARCHAR(24),
39
+ primary_use VARCHAR(34),
40
+ electric_range INTEGER,
41
+ odometer_reading INTEGER,
42
+ odometer_reading_description VARCHAR(55),
43
+ new_or_used VARCHAR(4),
44
+ sale_price INTEGER,
45
+ sale_date DATE,
46
+ base_msrp INTEGER,
47
+ transaction_type VARCHAR(32),
48
+ transaction_date DATE,
49
+ year INTEGER,
50
+ county VARCHAR(20),
51
+ city VARCHAR(24),
52
+ state VARCHAR(2),
53
+ postal_code INTEGER,
54
+ cafv_eligibility VARCHAR(40),
55
+ meets_2019_hb_2042_electric_range_requirement BOOLEAN,
56
+ meets_2019_hb_2042_sale_date_requirement BOOLEAN,
57
+ meets_2019_hb_2042_sale_price_value_requirement BOOLEAN,
58
+ battery_range_requirement_2019_hb_2042 VARCHAR(32),
59
+ purchase_date_requirement_2019_hb_2042 VARCHAR(59),
60
+ sale_price_value_requirement_2019_hb_2042 VARCHAR(59),
61
+ ev_fee_paid VARCHAR(14),
62
+ transportation_electrification_fee_paid VARCHAR(14),
63
+ hybrid_vehicle_electrificatin_fee_paid VARCHAR(14),
64
+ geoid_2020 INTEGER,
65
+ legislative_district INTEGER,
66
+ electric_utility VARCHAR(112)
67
+ );
requirements.txt CHANGED
@@ -3,5 +3,5 @@ pydantic==1.10.15
3
  python-dotenv==1.0.1
4
  streamlit==1.32.2
5
  streamlit_pills==0.3.0
6
- psycopg2-binary==2.9.9
7
  git+https://{GITHUB_TOKEN}@github.com/vectara/vectara-agent.git
 
3
  python-dotenv==1.0.1
4
  streamlit==1.32.2
5
  streamlit_pills==0.3.0
6
+ datasets==2.14.7
7
  git+https://{GITHUB_TOKEN}@github.com/vectara/vectara-agent.git