Robert Castagna commited on
Commit
1fce135
·
1 Parent(s): 70fcc09

adding postgres db as well as fixing API endpoints -- 'yfinance' broke

Browse files
.gitignore CHANGED
@@ -1,4 +1,5 @@
1
  secrets.json
2
  edgar-crawler/
3
  .venv/
4
- .env
 
 
1
  secrets.json
2
  edgar-crawler/
3
  .venv/
4
+ .env
5
+ .streamlit/
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ libpq-dev
pages/1_Fundamentals.py CHANGED
@@ -6,8 +6,9 @@ import streamlit as st
6
  import yfinance as yf
7
  from datetime import timedelta
8
  from dotenv import load_dotenv
 
9
 
10
- load_dotenv()
11
 
12
  def get_finnhub_data(example: str) -> json:
13
  """
@@ -96,8 +97,72 @@ def get_list_of_tickers():
96
  list_of_tickers.append(comp_info[i]['symbol'])
97
  return list_of_tickers
98
 
99
- # ---------------------------------------------------------------------------------------------- #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
 
101
  if 'tickers' not in st.session_state:
102
  tickers = [
103
  "AAPL", "MSFT", "GOOG", "NVDA", "TSLA",
@@ -115,50 +180,85 @@ with st.form(key="selecting columns"):
115
  st.session_state['tickers'] = symbols
116
 
117
  if submit_button and symbols and strategy_selection == 'Value':
118
- gains_data = {}
119
- hash_map = {}
120
-
121
- for ticker in symbols:
122
- # make all the API calls and capture return json
123
- basic_info = get_industry(ticker)
124
- metric_data, annual_series_data, quarterly_series_data = get_company_metrics(ticker)
125
-
126
- # reformat all JSON returns to be flattened dictionaries
127
- roe_dict = {'roe': annual_series_data['roe'][0]['v']*100 if ('roe' in annual_series_data) and (len(annual_series_data['roe'])>1) and ('v' in annual_series_data['roe'][0]) else 0}
128
- eps_dict = {'eps' :annual_series_data['eps'][0]['v'] if ('eps' in annual_series_data) and (len(annual_series_data['eps'])>1) and ('v' in annual_series_data['eps'][0]) else 0}
129
- pe_dict = {'pe': annual_series_data['pe'][0]['v'] if ('pe' in annual_series_data) and (len(annual_series_data['pe'])>1) and ('v' in annual_series_data['pe'][0]) else 0}
130
- ps_dict = {'ps': annual_series_data['ps'][0]['v'] if ('ps' in annual_series_data) and (len(annual_series_data['ps'])>1) and ('v' in annual_series_data['ps'][0]) else 0}
131
- pb_dict = {'pb': annual_series_data['pb'][0]['v'] if ('pb' in annual_series_data) and (len(annual_series_data['pb'])>1) and ('v' in annual_series_data['pb'][0]) else 0}
132
- pfcf_dict = {'pfcf': annual_series_data['pfcf'][0]['v'] if ('pfcf' in annual_series_data) and (len(annual_series_data['pfcf'])>1) and ('v' in annual_series_data['pfcf'][0]) else 0}
133
-
134
- # merge all dictionary keys per ticker
135
- combined_info = basic_info.copy() # Make a copy of the basic info
136
- combined_info = combined_info | metric_data | roe_dict | eps_dict | pe_dict | ps_dict | pb_dict | pfcf_dict
 
137
 
138
- hash_map[ticker] = combined_info
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
- # equity gains
141
- _, div, close_price = get_equity_gains(ticker=ticker, period=1810)
142
- gains_data[ticker] = [div, close_price]
143
-
144
-
145
- # Now, create a DataFrame from the hash_map
146
- df_1 = pd.DataFrame.from_dict(hash_map, orient='index')[['finnhubIndustry','beta','pe','ps','pb','pfcf','eps','roe','roeTTM','dividendPerShareTTM','dividendGrowthRate5Y','epsGrowth5Y']]
147
- df_2 = pd.DataFrame.from_dict(gains_data, orient='index', columns=['Recent Dividend','Price'])
148
-
149
- df_final = df_1.join(df_2)
150
-
151
- # calculate additional columns
152
- df_final['1Y_SP500_growth'], _, _ = get_equity_gains(ticker= '^GSPC', period=720)
153
- df_final['90_day_tbill'] = 4.06
154
- df_final['dividendGrowthRate5Y'] = df_final['dividendGrowthRate5Y']/100
155
- df_final['1Y_CAPM'] = df_final['90_day_tbill']/100 + df_final['beta'] * (df_final['1Y_SP500_growth'] - df_final['90_day_tbill']/100)
156
- df_final['1Y_DDM'] = (df_final['Recent Dividend'] * (1+df_final['dividendGrowthRate5Y'])) / (df_final['1Y_CAPM'] - df_final['dividendGrowthRate5Y'])
157
- df_final = df_final[['finnhubIndustry','Price','eps','roe','roeTTM','pe','ps','pb','pfcf','epsGrowth5Y','beta','Recent Dividend','90_day_tbill','1Y_SP500_growth','dividendPerShareTTM','dividendGrowthRate5Y','1Y_CAPM','1Y_DDM']]
158
- df_final.rename({'finnhubIndustry':'Industry', 'eps':'EPS', 'roe':'ROE','pe':'P/E','ps':'P/S', 'pb':'P/B','pfcf':'P/FCF','beta':'Beta'}, inplace=True, axis=1)
159
- st.write(df_final)
160
-
161
- st.session_state['tickers'] = symbols
162
 
163
  if submit_button and symbols and strategy_selection == 'Growth':
164
 
@@ -168,6 +268,7 @@ with st.form(key="selecting columns"):
168
  basic_info = get_industry(ticker)
169
  metric_data, annual_series_data, quarterly_series_data = get_company_metrics(ticker)
170
 
 
171
  # reformat all JSON returns to be flattened dictionaries
172
  roe_dict = {'roe': annual_series_data['roe'][0]['v'] if ('roe' in annual_series_data) and (len(annual_series_data['roe'])>1) and ('v' in annual_series_data['roe'][0]) else 0}
173
  ev_dict = {'ev' :annual_series_data['ev'][0]['v'] if ('ev' in annual_series_data) and (len(annual_series_data['ev'])>1) and ('v' in annual_series_data['ev'][0]) else 0}
@@ -184,22 +285,22 @@ with st.form(key="selecting columns"):
184
  hash_map[ticker] = combined_info
185
 
186
  # equity gains
187
- _, div, close_price = get_equity_gains(ticker=ticker, period=1810)
188
- gains_data[ticker] = [div, close_price]
189
 
190
 
191
  # Now, create a DataFrame from the hash_map
192
  df_1 = pd.DataFrame.from_dict(hash_map, orient='index')[['finnhubIndustry','roe','marketCapitalization','ebitdPerShareAnnual','pe','ps','pb','salesPerShare','eps','epsGrowth5Y','ev','operatingMarginAnnual', 'ebitdPerShareTTM', 'ebitdaCagr5Y', 'ebitdaInterimCagr5Y']]
193
- df_2 = pd.DataFrame.from_dict(gains_data, orient='index', columns=['Recent Dividend','Price'])
194
- df_final = df_1.join(df_2)
195
 
196
- df_final['PE/G'] = df_final['pe'] / df_final['epsGrowth5Y']
197
- df_final.rename({'finnhubIndustry':'Industry','marketCapitalization':'MarketCap','roe':'ROE', 'ev':'Enterp. Val', 'pe':'P/E', 'ps':'P/S', 'pb':'P/B', 'eps': 'EPS'}, inplace=True, axis=1)
198
- st.write(df_final)
199
 
200
  st.session_state['tickers'] = symbols
201
 
202
  if submit_button and symbols and strategy_selection == 'Bypass':
203
- st.write('You have selected to bypass the metrics calculation for the selected tickers.')
204
  st.session_state['tickers'] = symbols
205
 
 
6
  import yfinance as yf
7
  from datetime import timedelta
8
  from dotenv import load_dotenv
9
+ import psycopg2
10
 
11
+ load_dotenv(override=True)
12
 
13
  def get_finnhub_data(example: str) -> json:
14
  """
 
97
  list_of_tickers.append(comp_info[i]['symbol'])
98
  return list_of_tickers
99
 
100
+ def handle_json_data(data, ticker) -> tuple[list[pd.DataFrame], int]:
101
+
102
+ df_list = []
103
+
104
+ for key, val in data.items():
105
+
106
+ if val and hasattr(val, '__getitem__') and isinstance(val[0], dict):
107
+ #print('IS time series data')
108
+
109
+ temporal_df = pd.DataFrame(val)
110
+ temporal_df = temporal_df.rename({'v':key}, axis=1)
111
+ temporal_df['ticker'] = ticker
112
+ axis = 1
113
+
114
+ if not temporal_df.empty:
115
+ temporal_df = temporal_df.set_index(['period','ticker'])
116
+ df_list.append(temporal_df)
117
+
118
+ elif len(val) > 0:
119
+ #print('not time series data')
120
+ non_temporal_df = pd.DataFrame(data.items(), columns=['ticker', ticker])
121
+ non_temporal_df = non_temporal_df.set_index('ticker').T
122
+ df_list.append(non_temporal_df)
123
+ axis = 0
124
+ break
125
+ else:
126
+ print("dict key has no data.")
127
+
128
+ return df_list, axis
129
+
130
+ def insert_dataframe(cursor, table_name, df):
131
+ """
132
+ Insert a DataFrame into the specified table.
133
+ """
134
+ # Prepare the INSERT statement dynamically
135
+ columns = df.columns
136
+ columns_str = ', '.join(columns)
137
+ #print('cols for insert', columns_str)
138
+ placeholders = ', '.join(['%s'] * len(columns))
139
+ # get primary keys for insert collision handling
140
+ pkey_query = f"""
141
+ SELECT c.column_name
142
+ FROM information_schema.table_constraints tc
143
+ JOIN information_schema.constraint_column_usage AS ccu USING (constraint_schema, constraint_name)
144
+ JOIN information_schema.columns AS c ON c.table_schema = tc.constraint_schema
145
+ AND tc.table_name = c.table_name AND ccu.column_name = c.column_name
146
+ WHERE constraint_type in ('UNIQUE', 'PRIMARY KEY') and tc.table_name = '{table_name}';
147
+ """
148
+ cursor.execute(pkey_query)
149
+ pkeys_tuple = cursor.fetchall()
150
+ pkeys_list = [col[0] for col in pkeys_tuple]
151
+ pkeys_str = ', '.join(pkeys_list)
152
+
153
+ insert_query = f"INSERT INTO {table_name} ({columns_str}) VALUES ({placeholders}) ON CONFLICT ({pkeys_str}) DO NOTHING"
154
+
155
+ # Ensure DataFrame has the required columns
156
+ df = df[columns]
157
+
158
+ # Convert DataFrame to list of tuples
159
+ data = list(df.itertuples(index=False, name=None))
160
+
161
+ # Execute batch insert
162
+ cursor.executemany(insert_query, data)
163
+ print('df inserted.')
164
 
165
+ # ---------------------------------------------------------------------------------------------- #
166
  if 'tickers' not in st.session_state:
167
  tickers = [
168
  "AAPL", "MSFT", "GOOG", "NVDA", "TSLA",
 
180
  st.session_state['tickers'] = symbols
181
 
182
  if submit_button and symbols and strategy_selection == 'Value':
183
+
184
+ # Mapping variables to target tables. keys are dataframe names, values are (write mode, db table)
185
+ table_mapping = {
186
+ 'quarterly_series_data': ('APPEND', 'quarterly_financials'),
187
+ 'basic_info': ('OVERWRITE', 'companies')
188
+ }
189
+
190
+ dbparams = {
191
+ 'dbname':"hf_fundamentals",
192
+ 'user':"postgres",
193
+ 'password':os.environ['postgres_pwd'],
194
+ 'host':"localhost",
195
+ 'port':"5432"
196
+ }
197
+
198
+ try:
199
+ with psycopg2.connect(**dbparams) as conn:
200
+ conn.autocommit=True
201
+
202
+ with conn.cursor() as cur:
203
 
204
+ query_companies = "SELECT * FROM public.quarterly_financials where ticker = %s;" # works because of foreign key relation
205
+
206
+ all_data = {}
207
+
208
+ for ticker in symbols:
209
+ # first, check if the ticker has a record in the database
210
+ cur.execute(query_companies, (ticker,))
211
+ results = cur.fetchall()
212
+
213
+ if len(results) == 0: # if not in database, call api's and store in db
214
+ print("no data found in db")
215
+ # make all the API calls and capture return json
216
+ basic_info = get_industry(ticker)
217
+ metric_data, annual_series_data, quarterly_series_data = get_company_metrics(ticker)
218
+
219
+ api_data_mapping = {'basic_info':basic_info,
220
+ # 'metric_data':metric_data,
221
+ #'annual_series_data':annual_series_data,
222
+ 'quarterly_series_data':quarterly_series_data}
223
+
224
+ for df_name, df in api_data_mapping.items():
225
+ df_main_list = []
226
+
227
+ # creates a list of dataframes where each index corresponds to a ticker
228
+ df_list, axis = handle_json_data(df, ticker)
229
+ df_main = pd.concat(df_list, axis=axis)
230
+ df_main_list.append(df_main)
231
+
232
+ # craft a dict where key is data source and values are list of datafames for every ticker
233
+ if df_name not in all_data:
234
+ all_data[df_name] = df_main_list
235
+ else:
236
+ print('redundant.')
237
+
238
+ else:
239
+ print("retrieved from db.")
240
+ st.write(results[0])
241
 
242
+ for dataframe_name, data in all_data.items():
243
+
244
+ write_mode, target_table = table_mapping[dataframe_name]
245
+ print(write_mode, dataframe_name, target_table)
246
+
247
+ # combines all tickers into one dataframe for a given data source
248
+ df_all_tickers = pd.concat(data)
249
+
250
+ print(ticker)
251
+ unstacked_df = df_all_tickers.reset_index()
252
+
253
+ if 'index' in unstacked_df.columns: # hack fix for single index dataframes ..
254
+ unstacked_df=unstacked_df.drop('ticker', axis=1)
255
+ unstacked_df.rename(columns={'index':'ticker'}, inplace = True)
256
+
257
+ insert_dataframe(cur, target_table, unstacked_df)
258
+
259
+ st.session_state['tickers'] = symbols
260
+ except:
261
+ print("You do not have an active Postgres instance running. Select 'Bypass' and continue to Portfolio Builder.")
 
 
262
 
263
  if submit_button and symbols and strategy_selection == 'Growth':
264
 
 
268
  basic_info = get_industry(ticker)
269
  metric_data, annual_series_data, quarterly_series_data = get_company_metrics(ticker)
270
 
271
+
272
  # reformat all JSON returns to be flattened dictionaries
273
  roe_dict = {'roe': annual_series_data['roe'][0]['v'] if ('roe' in annual_series_data) and (len(annual_series_data['roe'])>1) and ('v' in annual_series_data['roe'][0]) else 0}
274
  ev_dict = {'ev' :annual_series_data['ev'][0]['v'] if ('ev' in annual_series_data) and (len(annual_series_data['ev'])>1) and ('v' in annual_series_data['ev'][0]) else 0}
 
285
  hash_map[ticker] = combined_info
286
 
287
  # equity gains
288
+ # _, div, close_price = get_equity_gains(ticker=ticker, period=1810)
289
+ # gains_data[ticker] = [div, close_price]
290
 
291
 
292
  # Now, create a DataFrame from the hash_map
293
  df_1 = pd.DataFrame.from_dict(hash_map, orient='index')[['finnhubIndustry','roe','marketCapitalization','ebitdPerShareAnnual','pe','ps','pb','salesPerShare','eps','epsGrowth5Y','ev','operatingMarginAnnual', 'ebitdPerShareTTM', 'ebitdaCagr5Y', 'ebitdaInterimCagr5Y']]
294
+ # df_2 = pd.DataFrame.from_dict(gains_data, orient='index', columns=['Recent Dividend','Price'])
295
+ # df_final = df_1.join(df_2)
296
 
297
+ df_1['PE/G'] = df_1['pe'] / df_1['epsGrowth5Y']
298
+ df_1.rename({'finnhubIndustry':'Industry','marketCapitalization':'MarketCap','roe':'ROE', 'ev':'Enterp. Val', 'pe':'P/E', 'ps':'P/S', 'pb':'P/B', 'eps': 'EPS'}, inplace=True, axis=1)
299
+ st.write(df_1)
300
 
301
  st.session_state['tickers'] = symbols
302
 
303
  if submit_button and symbols and strategy_selection == 'Bypass':
304
+ st.write('You have selected to bypass the metrics calculation for the selected tickers. Proceed to Portfolio Builder.')
305
  st.session_state['tickers'] = symbols
306
 
pages/2_Portfolio_Builder.py CHANGED
@@ -115,7 +115,7 @@ with st.form(key="selecting columns"):
115
  obb
116
  .equity
117
  .price
118
- .historical(tickers, start_date=start_date, end_date=end_date, provider="yfinance")
119
  .to_df()
120
  .pivot(columns="symbol", values="close")
121
  )
 
115
  obb
116
  .equity
117
  .price
118
+ .historical(tickers, start_date=start_date, end_date=end_date, provider="fmp")
119
  .to_df()
120
  .pivot(columns="symbol", values="close")
121
  )
requirements.txt CHANGED
@@ -1,3 +1,5 @@
 
 
1
  openbb==4.1.4
2
  riskfolio-lib==5.0.1
3
  transformers==4.36.2
@@ -8,7 +10,7 @@ beautifulsoup4==4.12.3
8
  click==7.0
9
  cssutils==1.0.2
10
  numpy==1.24.4
11
- lxml==4.9.1
12
  requests>=2.31.0
13
  urllib3>=1.26.7
14
  pandas>=1.5.3
@@ -20,4 +22,4 @@ regex==2024.5.15
20
  yfinance==0.2.28
21
  python-dotenv==1.0.1
22
  asyncio==3.4.3
23
- nest_asyncio==1.6.0
 
1
+ sqlalchemy
2
+ psycopg2-binary
3
  openbb==4.1.4
4
  riskfolio-lib==5.0.1
5
  transformers==4.36.2
 
10
  click==7.0
11
  cssutils==1.0.2
12
  numpy==1.24.4
13
+ #lxml==4.9.1
14
  requests>=2.31.0
15
  urllib3>=1.26.7
16
  pandas>=1.5.3
 
22
  yfinance==0.2.28
23
  python-dotenv==1.0.1
24
  asyncio==3.4.3
25
+ nest_asyncio==1.6.0
test.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import psycopg2
2
+
3
+ connection = psycopg2.connect(database="hf_fundamentals", user="postgres", password="4x?S3nR", host="localhost", port=5432)
4
+
5
+ cursor = connection.cursor()
6
+
7
+ cursor.execute("SELECT * FROM pg_catalog.pg_tables;")
8
+
9
+ # Fetch all rows from database
10
+ record = cursor.fetchall()
11
+
12
+ print("Data from Database:- ", record)