Spaces:
Running
Running
Robert Castagna
commited on
Commit
·
1fce135
1
Parent(s):
70fcc09
adding postgres db as well as fixing API endpoints -- 'yfinance' broke
Browse files- .gitignore +2 -1
- packages.txt +1 -0
- pages/1_Fundamentals.py +153 -52
- pages/2_Portfolio_Builder.py +1 -1
- requirements.txt +4 -2
- test.py +12 -0
.gitignore
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
secrets.json
|
2 |
edgar-crawler/
|
3 |
.venv/
|
4 |
-
.env
|
|
|
|
1 |
secrets.json
|
2 |
edgar-crawler/
|
3 |
.venv/
|
4 |
+
.env
|
5 |
+
.streamlit/
|
packages.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
libpq-dev
|
pages/1_Fundamentals.py
CHANGED
@@ -6,8 +6,9 @@ import streamlit as st
|
|
6 |
import yfinance as yf
|
7 |
from datetime import timedelta
|
8 |
from dotenv import load_dotenv
|
|
|
9 |
|
10 |
-
load_dotenv()
|
11 |
|
12 |
def get_finnhub_data(example: str) -> json:
|
13 |
"""
|
@@ -96,8 +97,72 @@ def get_list_of_tickers():
|
|
96 |
list_of_tickers.append(comp_info[i]['symbol'])
|
97 |
return list_of_tickers
|
98 |
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
|
|
101 |
if 'tickers' not in st.session_state:
|
102 |
tickers = [
|
103 |
"AAPL", "MSFT", "GOOG", "NVDA", "TSLA",
|
@@ -115,50 +180,85 @@ with st.form(key="selecting columns"):
|
|
115 |
st.session_state['tickers'] = symbols
|
116 |
|
117 |
if submit_button and symbols and strategy_selection == 'Value':
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
|
|
137 |
|
138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
st.session_state['tickers'] = symbols
|
162 |
|
163 |
if submit_button and symbols and strategy_selection == 'Growth':
|
164 |
|
@@ -168,6 +268,7 @@ with st.form(key="selecting columns"):
|
|
168 |
basic_info = get_industry(ticker)
|
169 |
metric_data, annual_series_data, quarterly_series_data = get_company_metrics(ticker)
|
170 |
|
|
|
171 |
# reformat all JSON returns to be flattened dictionaries
|
172 |
roe_dict = {'roe': annual_series_data['roe'][0]['v'] if ('roe' in annual_series_data) and (len(annual_series_data['roe'])>1) and ('v' in annual_series_data['roe'][0]) else 0}
|
173 |
ev_dict = {'ev' :annual_series_data['ev'][0]['v'] if ('ev' in annual_series_data) and (len(annual_series_data['ev'])>1) and ('v' in annual_series_data['ev'][0]) else 0}
|
@@ -184,22 +285,22 @@ with st.form(key="selecting columns"):
|
|
184 |
hash_map[ticker] = combined_info
|
185 |
|
186 |
# equity gains
|
187 |
-
_, div, close_price = get_equity_gains(ticker=ticker, period=1810)
|
188 |
-
gains_data[ticker] = [div, close_price]
|
189 |
|
190 |
|
191 |
# Now, create a DataFrame from the hash_map
|
192 |
df_1 = pd.DataFrame.from_dict(hash_map, orient='index')[['finnhubIndustry','roe','marketCapitalization','ebitdPerShareAnnual','pe','ps','pb','salesPerShare','eps','epsGrowth5Y','ev','operatingMarginAnnual', 'ebitdPerShareTTM', 'ebitdaCagr5Y', 'ebitdaInterimCagr5Y']]
|
193 |
-
df_2 = pd.DataFrame.from_dict(gains_data, orient='index', columns=['Recent Dividend','Price'])
|
194 |
-
df_final = df_1.join(df_2)
|
195 |
|
196 |
-
|
197 |
-
|
198 |
-
st.write(
|
199 |
|
200 |
st.session_state['tickers'] = symbols
|
201 |
|
202 |
if submit_button and symbols and strategy_selection == 'Bypass':
|
203 |
-
st.write('You have selected to bypass the metrics calculation for the selected tickers.')
|
204 |
st.session_state['tickers'] = symbols
|
205 |
|
|
|
6 |
import yfinance as yf
|
7 |
from datetime import timedelta
|
8 |
from dotenv import load_dotenv
|
9 |
+
import psycopg2
|
10 |
|
11 |
+
load_dotenv(override=True)
|
12 |
|
13 |
def get_finnhub_data(example: str) -> json:
|
14 |
"""
|
|
|
97 |
list_of_tickers.append(comp_info[i]['symbol'])
|
98 |
return list_of_tickers
|
99 |
|
100 |
+
def handle_json_data(data, ticker) -> tuple[list[pd.DataFrame], int]:
|
101 |
+
|
102 |
+
df_list = []
|
103 |
+
|
104 |
+
for key, val in data.items():
|
105 |
+
|
106 |
+
if val and hasattr(val, '__getitem__') and isinstance(val[0], dict):
|
107 |
+
#print('IS time series data')
|
108 |
+
|
109 |
+
temporal_df = pd.DataFrame(val)
|
110 |
+
temporal_df = temporal_df.rename({'v':key}, axis=1)
|
111 |
+
temporal_df['ticker'] = ticker
|
112 |
+
axis = 1
|
113 |
+
|
114 |
+
if not temporal_df.empty:
|
115 |
+
temporal_df = temporal_df.set_index(['period','ticker'])
|
116 |
+
df_list.append(temporal_df)
|
117 |
+
|
118 |
+
elif len(val) > 0:
|
119 |
+
#print('not time series data')
|
120 |
+
non_temporal_df = pd.DataFrame(data.items(), columns=['ticker', ticker])
|
121 |
+
non_temporal_df = non_temporal_df.set_index('ticker').T
|
122 |
+
df_list.append(non_temporal_df)
|
123 |
+
axis = 0
|
124 |
+
break
|
125 |
+
else:
|
126 |
+
print("dict key has no data.")
|
127 |
+
|
128 |
+
return df_list, axis
|
129 |
+
|
130 |
+
def insert_dataframe(cursor, table_name, df):
|
131 |
+
"""
|
132 |
+
Insert a DataFrame into the specified table.
|
133 |
+
"""
|
134 |
+
# Prepare the INSERT statement dynamically
|
135 |
+
columns = df.columns
|
136 |
+
columns_str = ', '.join(columns)
|
137 |
+
#print('cols for insert', columns_str)
|
138 |
+
placeholders = ', '.join(['%s'] * len(columns))
|
139 |
+
# get primary keys for insert collision handling
|
140 |
+
pkey_query = f"""
|
141 |
+
SELECT c.column_name
|
142 |
+
FROM information_schema.table_constraints tc
|
143 |
+
JOIN information_schema.constraint_column_usage AS ccu USING (constraint_schema, constraint_name)
|
144 |
+
JOIN information_schema.columns AS c ON c.table_schema = tc.constraint_schema
|
145 |
+
AND tc.table_name = c.table_name AND ccu.column_name = c.column_name
|
146 |
+
WHERE constraint_type in ('UNIQUE', 'PRIMARY KEY') and tc.table_name = '{table_name}';
|
147 |
+
"""
|
148 |
+
cursor.execute(pkey_query)
|
149 |
+
pkeys_tuple = cursor.fetchall()
|
150 |
+
pkeys_list = [col[0] for col in pkeys_tuple]
|
151 |
+
pkeys_str = ', '.join(pkeys_list)
|
152 |
+
|
153 |
+
insert_query = f"INSERT INTO {table_name} ({columns_str}) VALUES ({placeholders}) ON CONFLICT ({pkeys_str}) DO NOTHING"
|
154 |
+
|
155 |
+
# Ensure DataFrame has the required columns
|
156 |
+
df = df[columns]
|
157 |
+
|
158 |
+
# Convert DataFrame to list of tuples
|
159 |
+
data = list(df.itertuples(index=False, name=None))
|
160 |
+
|
161 |
+
# Execute batch insert
|
162 |
+
cursor.executemany(insert_query, data)
|
163 |
+
print('df inserted.')
|
164 |
|
165 |
+
# ---------------------------------------------------------------------------------------------- #
|
166 |
if 'tickers' not in st.session_state:
|
167 |
tickers = [
|
168 |
"AAPL", "MSFT", "GOOG", "NVDA", "TSLA",
|
|
|
180 |
st.session_state['tickers'] = symbols
|
181 |
|
182 |
if submit_button and symbols and strategy_selection == 'Value':
|
183 |
+
|
184 |
+
# Mapping variables to target tables. keys are dataframe names, values are (write mode, db table)
|
185 |
+
table_mapping = {
|
186 |
+
'quarterly_series_data': ('APPEND', 'quarterly_financials'),
|
187 |
+
'basic_info': ('OVERWRITE', 'companies')
|
188 |
+
}
|
189 |
+
|
190 |
+
dbparams = {
|
191 |
+
'dbname':"hf_fundamentals",
|
192 |
+
'user':"postgres",
|
193 |
+
'password':os.environ['postgres_pwd'],
|
194 |
+
'host':"localhost",
|
195 |
+
'port':"5432"
|
196 |
+
}
|
197 |
+
|
198 |
+
try:
|
199 |
+
with psycopg2.connect(**dbparams) as conn:
|
200 |
+
conn.autocommit=True
|
201 |
+
|
202 |
+
with conn.cursor() as cur:
|
203 |
|
204 |
+
query_companies = "SELECT * FROM public.quarterly_financials where ticker = %s;" # works because of foreign key relation
|
205 |
+
|
206 |
+
all_data = {}
|
207 |
+
|
208 |
+
for ticker in symbols:
|
209 |
+
# first, check if the ticker has a record in the database
|
210 |
+
cur.execute(query_companies, (ticker,))
|
211 |
+
results = cur.fetchall()
|
212 |
+
|
213 |
+
if len(results) == 0: # if not in database, call api's and store in db
|
214 |
+
print("no data found in db")
|
215 |
+
# make all the API calls and capture return json
|
216 |
+
basic_info = get_industry(ticker)
|
217 |
+
metric_data, annual_series_data, quarterly_series_data = get_company_metrics(ticker)
|
218 |
+
|
219 |
+
api_data_mapping = {'basic_info':basic_info,
|
220 |
+
# 'metric_data':metric_data,
|
221 |
+
#'annual_series_data':annual_series_data,
|
222 |
+
'quarterly_series_data':quarterly_series_data}
|
223 |
+
|
224 |
+
for df_name, df in api_data_mapping.items():
|
225 |
+
df_main_list = []
|
226 |
+
|
227 |
+
# creates a list of dataframes where each index corresponds to a ticker
|
228 |
+
df_list, axis = handle_json_data(df, ticker)
|
229 |
+
df_main = pd.concat(df_list, axis=axis)
|
230 |
+
df_main_list.append(df_main)
|
231 |
+
|
232 |
+
# craft a dict where key is data source and values are list of datafames for every ticker
|
233 |
+
if df_name not in all_data:
|
234 |
+
all_data[df_name] = df_main_list
|
235 |
+
else:
|
236 |
+
print('redundant.')
|
237 |
+
|
238 |
+
else:
|
239 |
+
print("retrieved from db.")
|
240 |
+
st.write(results[0])
|
241 |
|
242 |
+
for dataframe_name, data in all_data.items():
|
243 |
+
|
244 |
+
write_mode, target_table = table_mapping[dataframe_name]
|
245 |
+
print(write_mode, dataframe_name, target_table)
|
246 |
+
|
247 |
+
# combines all tickers into one dataframe for a given data source
|
248 |
+
df_all_tickers = pd.concat(data)
|
249 |
+
|
250 |
+
print(ticker)
|
251 |
+
unstacked_df = df_all_tickers.reset_index()
|
252 |
+
|
253 |
+
if 'index' in unstacked_df.columns: # hack fix for single index dataframes ..
|
254 |
+
unstacked_df=unstacked_df.drop('ticker', axis=1)
|
255 |
+
unstacked_df.rename(columns={'index':'ticker'}, inplace = True)
|
256 |
+
|
257 |
+
insert_dataframe(cur, target_table, unstacked_df)
|
258 |
+
|
259 |
+
st.session_state['tickers'] = symbols
|
260 |
+
except:
|
261 |
+
print("You do not have an active Postgres instance running. Select 'Bypass' and continue to Portfolio Builder.")
|
|
|
|
|
262 |
|
263 |
if submit_button and symbols and strategy_selection == 'Growth':
|
264 |
|
|
|
268 |
basic_info = get_industry(ticker)
|
269 |
metric_data, annual_series_data, quarterly_series_data = get_company_metrics(ticker)
|
270 |
|
271 |
+
|
272 |
# reformat all JSON returns to be flattened dictionaries
|
273 |
roe_dict = {'roe': annual_series_data['roe'][0]['v'] if ('roe' in annual_series_data) and (len(annual_series_data['roe'])>1) and ('v' in annual_series_data['roe'][0]) else 0}
|
274 |
ev_dict = {'ev' :annual_series_data['ev'][0]['v'] if ('ev' in annual_series_data) and (len(annual_series_data['ev'])>1) and ('v' in annual_series_data['ev'][0]) else 0}
|
|
|
285 |
hash_map[ticker] = combined_info
|
286 |
|
287 |
# equity gains
|
288 |
+
# _, div, close_price = get_equity_gains(ticker=ticker, period=1810)
|
289 |
+
# gains_data[ticker] = [div, close_price]
|
290 |
|
291 |
|
292 |
# Now, create a DataFrame from the hash_map
|
293 |
df_1 = pd.DataFrame.from_dict(hash_map, orient='index')[['finnhubIndustry','roe','marketCapitalization','ebitdPerShareAnnual','pe','ps','pb','salesPerShare','eps','epsGrowth5Y','ev','operatingMarginAnnual', 'ebitdPerShareTTM', 'ebitdaCagr5Y', 'ebitdaInterimCagr5Y']]
|
294 |
+
# df_2 = pd.DataFrame.from_dict(gains_data, orient='index', columns=['Recent Dividend','Price'])
|
295 |
+
# df_final = df_1.join(df_2)
|
296 |
|
297 |
+
df_1['PE/G'] = df_1['pe'] / df_1['epsGrowth5Y']
|
298 |
+
df_1.rename({'finnhubIndustry':'Industry','marketCapitalization':'MarketCap','roe':'ROE', 'ev':'Enterp. Val', 'pe':'P/E', 'ps':'P/S', 'pb':'P/B', 'eps': 'EPS'}, inplace=True, axis=1)
|
299 |
+
st.write(df_1)
|
300 |
|
301 |
st.session_state['tickers'] = symbols
|
302 |
|
303 |
if submit_button and symbols and strategy_selection == 'Bypass':
|
304 |
+
st.write('You have selected to bypass the metrics calculation for the selected tickers. Proceed to Portfolio Builder.')
|
305 |
st.session_state['tickers'] = symbols
|
306 |
|
pages/2_Portfolio_Builder.py
CHANGED
@@ -115,7 +115,7 @@ with st.form(key="selecting columns"):
|
|
115 |
obb
|
116 |
.equity
|
117 |
.price
|
118 |
-
.historical(tickers, start_date=start_date, end_date=end_date, provider="
|
119 |
.to_df()
|
120 |
.pivot(columns="symbol", values="close")
|
121 |
)
|
|
|
115 |
obb
|
116 |
.equity
|
117 |
.price
|
118 |
+
.historical(tickers, start_date=start_date, end_date=end_date, provider="fmp")
|
119 |
.to_df()
|
120 |
.pivot(columns="symbol", values="close")
|
121 |
)
|
requirements.txt
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
openbb==4.1.4
|
2 |
riskfolio-lib==5.0.1
|
3 |
transformers==4.36.2
|
@@ -8,7 +10,7 @@ beautifulsoup4==4.12.3
|
|
8 |
click==7.0
|
9 |
cssutils==1.0.2
|
10 |
numpy==1.24.4
|
11 |
-
lxml==4.9.1
|
12 |
requests>=2.31.0
|
13 |
urllib3>=1.26.7
|
14 |
pandas>=1.5.3
|
@@ -20,4 +22,4 @@ regex==2024.5.15
|
|
20 |
yfinance==0.2.28
|
21 |
python-dotenv==1.0.1
|
22 |
asyncio==3.4.3
|
23 |
-
nest_asyncio==1.6.0
|
|
|
1 |
+
sqlalchemy
|
2 |
+
psycopg2-binary
|
3 |
openbb==4.1.4
|
4 |
riskfolio-lib==5.0.1
|
5 |
transformers==4.36.2
|
|
|
10 |
click==7.0
|
11 |
cssutils==1.0.2
|
12 |
numpy==1.24.4
|
13 |
+
#lxml==4.9.1
|
14 |
requests>=2.31.0
|
15 |
urllib3>=1.26.7
|
16 |
pandas>=1.5.3
|
|
|
22 |
yfinance==0.2.28
|
23 |
python-dotenv==1.0.1
|
24 |
asyncio==3.4.3
|
25 |
+
nest_asyncio==1.6.0
|
test.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import psycopg2
|
2 |
+
|
3 |
+
connection = psycopg2.connect(database="hf_fundamentals", user="postgres", password="4x?S3nR", host="localhost", port=5432)
|
4 |
+
|
5 |
+
cursor = connection.cursor()
|
6 |
+
|
7 |
+
cursor.execute("SELECT * FROM pg_catalog.pg_tables;")
|
8 |
+
|
9 |
+
# Fetch all rows from database
|
10 |
+
record = cursor.fetchall()
|
11 |
+
|
12 |
+
print("Data from Database:- ", record)
|