Tonic commited on
Commit
e115d61
·
1 Parent(s): 65bf228

attempt to improve yfinance functionalities

Browse files
Files changed (1) hide show
  1. app.py +122 -25
app.py CHANGED
@@ -107,7 +107,7 @@ def get_next_trading_day() -> datetime:
107
 
108
  def get_historical_data(symbol: str, timeframe: str = "1d", lookback_days: int = 365) -> pd.DataFrame:
109
  """
110
- Fetch historical data using yfinance.
111
 
112
  Args:
113
  symbol (str): The stock symbol (e.g., 'AAPL')
@@ -131,11 +131,11 @@ def get_historical_data(symbol: str, timeframe: str = "1d", lookback_days: int =
131
  }
132
  interval = tf_map.get(timeframe, "1d")
133
 
134
- # Adjust lookback period based on timeframe
135
  if timeframe == "1h":
136
- lookback_days = min(lookback_days, 30) # Yahoo limits hourly data to 30 days
137
  elif timeframe == "15m":
138
- lookback_days = min(lookback_days, 5) # Yahoo limits 15m data to 5 days
139
 
140
  # Calculate date range
141
  end_date = datetime.now()
@@ -145,7 +145,16 @@ def get_historical_data(symbol: str, timeframe: str = "1d", lookback_days: int =
145
  ticker = yf.Ticker(symbol)
146
 
147
  def fetch_history():
148
- return ticker.history(start=start_date, end=end_date, interval=interval)
 
 
 
 
 
 
 
 
 
149
 
150
  df = retry_yfinance_request(fetch_history)
151
 
@@ -172,6 +181,23 @@ def get_historical_data(symbol: str, timeframe: str = "1d", lookback_days: int =
172
  df['Sector'] = info.get('sector', 'Unknown')
173
  df['Industry'] = info.get('industry', 'Unknown')
174
  df['Dividend_Yield'] = float(info.get('dividendYield', 0))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  except Exception as e:
176
  print(f"Warning: Could not fetch company info for {symbol}: {str(e)}")
177
  # Set default values for missing info
@@ -179,6 +205,20 @@ def get_historical_data(symbol: str, timeframe: str = "1d", lookback_days: int =
179
  df['Sector'] = 'Unknown'
180
  df['Industry'] = 'Unknown'
181
  df['Dividend_Yield'] = 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
  # Calculate technical indicators with adjusted windows based on timeframe
184
  if timeframe == "1d":
@@ -219,6 +259,21 @@ def get_historical_data(symbol: str, timeframe: str = "1d", lookback_days: int =
219
  df['Avg_Daily_Volume'] = df['Volume'].rolling(window=vol_window, min_periods=1).mean()
220
  df['Volume_Volatility'] = df['Volume'].rolling(window=vol_window, min_periods=1).std()
221
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  # Fill NaN values using forward fill then backward fill
223
  df = df.ffill().bfill()
224
 
@@ -229,7 +284,16 @@ def get_historical_data(symbol: str, timeframe: str = "1d", lookback_days: int =
229
  extended_start_date = start_date - timedelta(days=min_required_points - len(df))
230
 
231
  def fetch_extended_history():
232
- return ticker.history(start=extended_start_date, end=start_date, interval=interval)
 
 
 
 
 
 
 
 
 
233
 
234
  extended_df = retry_yfinance_request(fetch_extended_history)
235
  if not extended_df.empty:
@@ -985,7 +1049,7 @@ def create_interface():
985
  )
986
  hourly_lookback_days = gr.Slider(
987
  minimum=1,
988
- maximum=30, # Limited to 30 days for hourly data
989
  value=14,
990
  step=1,
991
  label="Historical Lookback (Days)"
@@ -997,10 +1061,14 @@ def create_interface():
997
  )
998
  hourly_predict_btn = gr.Button("Analyze Stock")
999
  gr.Markdown("""
1000
- **Note for Hourly Analysis:**
1001
- - Maximum lookback period: 30 days (Yahoo Finance limit)
1002
- - Maximum prediction period: 7 days
1003
- - Data is only available during market hours
 
 
 
 
1004
  """)
1005
 
1006
  with gr.Column():
@@ -1012,10 +1080,10 @@ def create_interface():
1012
  gr.Markdown("### Structured Product Metrics")
1013
  hourly_metrics = gr.JSON(label="Product Metrics")
1014
 
1015
- gr.Markdown("### Risk Analysis")
1016
  hourly_risk_metrics = gr.JSON(label="Risk Metrics")
1017
 
1018
- gr.Markdown("### Sector Analysis")
1019
  hourly_sector_metrics = gr.JSON(label="Sector Metrics")
1020
 
1021
  # 15-Minute Analysis Tab
@@ -1032,7 +1100,7 @@ def create_interface():
1032
  )
1033
  min15_lookback_days = gr.Slider(
1034
  minimum=1,
1035
- maximum=5, # Yahoo Finance limit for 15-minute data
1036
  value=3,
1037
  step=1,
1038
  label="Historical Lookback (Days)"
@@ -1044,11 +1112,15 @@ def create_interface():
1044
  )
1045
  min15_predict_btn = gr.Button("Analyze Stock")
1046
  gr.Markdown("""
1047
- **Note for 15-Minute Analysis:**
1048
- - Maximum lookback period: 5 days (Yahoo Finance limit)
1049
- - Maximum prediction period: 2 days
1050
- - Data is only available during market hours
1051
- - Requires at least 64 data points for Chronos predictions
 
 
 
 
1052
  """)
1053
 
1054
  with gr.Column():
@@ -1063,7 +1135,7 @@ def create_interface():
1063
  gr.Markdown("### Risk Analysis")
1064
  min15_risk_metrics = gr.JSON(label="Risk Metrics")
1065
 
1066
- gr.Markdown("### Sector Analysis")
1067
  min15_sector_metrics = gr.JSON(label="Sector Metrics")
1068
 
1069
  def analyze_stock(symbol, timeframe, prediction_days, lookback_days, strategy):
@@ -1080,7 +1152,13 @@ def create_interface():
1080
  "Industry": df['Industry'].iloc[-1],
1081
  "Dividend_Yield": df['Dividend_Yield'].iloc[-1],
1082
  "Avg_Daily_Volume": df['Avg_Daily_Volume'].iloc[-1],
1083
- "Volume_Volatility": df['Volume_Volatility'].iloc[-1]
 
 
 
 
 
 
1084
  }
1085
 
1086
  # Calculate risk metrics
@@ -1089,7 +1167,12 @@ def create_interface():
1089
  "Max_Drawdown": df['Max_Drawdown'].iloc[-1],
1090
  "Current_Drawdown": df['Drawdown'].iloc[-1],
1091
  "Sharpe_Ratio": (df['Returns'].mean() * 252) / (df['Returns'].std() * np.sqrt(252)),
1092
- "Sortino_Ratio": (df['Returns'].mean() * 252) / (df['Returns'][df['Returns'] < 0].std() * np.sqrt(252))
 
 
 
 
 
1093
  }
1094
 
1095
  # Calculate sector metrics
@@ -1097,9 +1180,23 @@ def create_interface():
1097
  "Sector": df['Sector'].iloc[-1],
1098
  "Industry": df['Industry'].iloc[-1],
1099
  "Market_Cap_Rank": "Large" if df['Market_Cap'].iloc[-1] > 1e10 else "Mid" if df['Market_Cap'].iloc[-1] > 1e9 else "Small",
1100
- "Liquidity_Score": "High" if df['Avg_Daily_Volume'].iloc[-1] > 1e6 else "Medium" if df['Avg_Daily_Volume'].iloc[-1] > 1e5 else "Low"
 
 
 
1101
  }
1102
 
 
 
 
 
 
 
 
 
 
 
 
1103
  return signals, fig, product_metrics, risk_metrics, sector_metrics
1104
  except Exception as e:
1105
  error_message = str(e)
@@ -1150,7 +1247,7 @@ def create_interface():
1150
  Args:
1151
  s (str): Stock symbol (e.g., "AAPL", "MSFT", "GOOGL")
1152
  pd (int): Number of days to predict (1-7)
1153
- ld (int): Historical lookback period in days (1-30)
1154
  st (str): Prediction strategy to use ("chronos" or "technical")
1155
 
1156
  Returns:
@@ -1181,7 +1278,7 @@ def create_interface():
1181
  Args:
1182
  s (str): Stock symbol (e.g., "AAPL", "MSFT", "GOOGL")
1183
  pd (int): Number of days to predict (1-2)
1184
- ld (int): Historical lookback period in days (1-5)
1185
  st (str): Prediction strategy to use ("chronos" or "technical")
1186
 
1187
  Returns:
 
107
 
108
  def get_historical_data(symbol: str, timeframe: str = "1d", lookback_days: int = 365) -> pd.DataFrame:
109
  """
110
+ Fetch historical data using yfinance with enhanced support for intraday data.
111
 
112
  Args:
113
  symbol (str): The stock symbol (e.g., 'AAPL')
 
131
  }
132
  interval = tf_map.get(timeframe, "1d")
133
 
134
+ # Adjust lookback period based on timeframe and yfinance limits
135
  if timeframe == "1h":
136
+ lookback_days = min(lookback_days, 60) # Yahoo allows up to 60 days for hourly data
137
  elif timeframe == "15m":
138
+ lookback_days = min(lookback_days, 7) # Yahoo allows up to 7 days for 15m data
139
 
140
  # Calculate date range
141
  end_date = datetime.now()
 
145
  ticker = yf.Ticker(symbol)
146
 
147
  def fetch_history():
148
+ return ticker.history(
149
+ start=start_date,
150
+ end=end_date,
151
+ interval=interval,
152
+ prepost=True, # Include pre/post market data for intraday
153
+ actions=True, # Include dividends and splits
154
+ auto_adjust=True, # Automatically adjust for splits
155
+ back_adjust=True, # Back-adjust data for splits
156
+ repair=True # Repair missing data points
157
+ )
158
 
159
  df = retry_yfinance_request(fetch_history)
160
 
 
181
  df['Sector'] = info.get('sector', 'Unknown')
182
  df['Industry'] = info.get('industry', 'Unknown')
183
  df['Dividend_Yield'] = float(info.get('dividendYield', 0))
184
+
185
+ # Add additional company metrics
186
+ df['Enterprise_Value'] = float(info.get('enterpriseValue', 0))
187
+ df['P/E_Ratio'] = float(info.get('trailingPE', 0))
188
+ df['Forward_P/E'] = float(info.get('forwardPE', 0))
189
+ df['PEG_Ratio'] = float(info.get('pegRatio', 0))
190
+ df['Price_to_Book'] = float(info.get('priceToBook', 0))
191
+ df['Price_to_Sales'] = float(info.get('priceToSalesTrailing12Months', 0))
192
+ df['Return_on_Equity'] = float(info.get('returnOnEquity', 0))
193
+ df['Return_on_Assets'] = float(info.get('returnOnAssets', 0))
194
+ df['Debt_to_Equity'] = float(info.get('debtToEquity', 0))
195
+ df['Current_Ratio'] = float(info.get('currentRatio', 0))
196
+ df['Quick_Ratio'] = float(info.get('quickRatio', 0))
197
+ df['Gross_Margin'] = float(info.get('grossMargins', 0))
198
+ df['Operating_Margin'] = float(info.get('operatingMargins', 0))
199
+ df['Net_Margin'] = float(info.get('netIncomeToCommon', 0))
200
+
201
  except Exception as e:
202
  print(f"Warning: Could not fetch company info for {symbol}: {str(e)}")
203
  # Set default values for missing info
 
205
  df['Sector'] = 'Unknown'
206
  df['Industry'] = 'Unknown'
207
  df['Dividend_Yield'] = 0.0
208
+ df['Enterprise_Value'] = 0.0
209
+ df['P/E_Ratio'] = 0.0
210
+ df['Forward_P/E'] = 0.0
211
+ df['PEG_Ratio'] = 0.0
212
+ df['Price_to_Book'] = 0.0
213
+ df['Price_to_Sales'] = 0.0
214
+ df['Return_on_Equity'] = 0.0
215
+ df['Return_on_Assets'] = 0.0
216
+ df['Debt_to_Equity'] = 0.0
217
+ df['Current_Ratio'] = 0.0
218
+ df['Quick_Ratio'] = 0.0
219
+ df['Gross_Margin'] = 0.0
220
+ df['Operating_Margin'] = 0.0
221
+ df['Net_Margin'] = 0.0
222
 
223
  # Calculate technical indicators with adjusted windows based on timeframe
224
  if timeframe == "1d":
 
259
  df['Avg_Daily_Volume'] = df['Volume'].rolling(window=vol_window, min_periods=1).mean()
260
  df['Volume_Volatility'] = df['Volume'].rolling(window=vol_window, min_periods=1).std()
261
 
262
+ # Calculate additional intraday metrics for shorter timeframes
263
+ if timeframe in ["1h", "15m"]:
264
+ # Intraday volatility
265
+ df['Intraday_High_Low'] = (df['High'] - df['Low']) / df['Close']
266
+ df['Intraday_Volatility'] = df['Intraday_High_Low'].rolling(window=vol_window, min_periods=1).mean()
267
+
268
+ # Volume analysis
269
+ df['Volume_Price_Trend'] = (df['Volume'] * df['Returns']).rolling(window=vol_window, min_periods=1).sum()
270
+ df['Volume_SMA'] = df['Volume'].rolling(window=vol_window, min_periods=1).mean()
271
+ df['Volume_Ratio'] = df['Volume'] / df['Volume_SMA']
272
+
273
+ # Price momentum
274
+ df['Price_Momentum'] = df['Close'].pct_change(periods=5)
275
+ df['Volume_Momentum'] = df['Volume'].pct_change(periods=5)
276
+
277
  # Fill NaN values using forward fill then backward fill
278
  df = df.ffill().bfill()
279
 
 
284
  extended_start_date = start_date - timedelta(days=min_required_points - len(df))
285
 
286
  def fetch_extended_history():
287
+ return ticker.history(
288
+ start=extended_start_date,
289
+ end=start_date,
290
+ interval=interval,
291
+ prepost=True,
292
+ actions=True,
293
+ auto_adjust=True,
294
+ back_adjust=True,
295
+ repair=True
296
+ )
297
 
298
  extended_df = retry_yfinance_request(fetch_extended_history)
299
  if not extended_df.empty:
 
1049
  )
1050
  hourly_lookback_days = gr.Slider(
1051
  minimum=1,
1052
+ maximum=60, # Enhanced to 60 days for hourly data
1053
  value=14,
1054
  step=1,
1055
  label="Historical Lookback (Days)"
 
1061
  )
1062
  hourly_predict_btn = gr.Button("Analyze Stock")
1063
  gr.Markdown("""
1064
+ **Hourly Analysis Features:**
1065
+ - **Extended Data Range**: Up to 60 days of historical data
1066
+ - **Pre/Post Market Data**: Includes extended hours trading data
1067
+ - **Auto-Adjusted Data**: Automatically adjusted for splits and dividends
1068
+ - **Metrics**: Intraday volatility, volume analysis, and momentum indicators
1069
+ - **Comprehensive Financial Ratios**: P/E, PEG, Price-to-Book, and more
1070
+ - **Maximum prediction period**: 7 days
1071
+ - **Data available during market hours only**
1072
  """)
1073
 
1074
  with gr.Column():
 
1080
  gr.Markdown("### Structured Product Metrics")
1081
  hourly_metrics = gr.JSON(label="Product Metrics")
1082
 
1083
+ gr.Markdown("### Comprehensive Risk Analysis")
1084
  hourly_risk_metrics = gr.JSON(label="Risk Metrics")
1085
 
1086
+ gr.Markdown("### Sector & Financial Analysis")
1087
  hourly_sector_metrics = gr.JSON(label="Sector Metrics")
1088
 
1089
  # 15-Minute Analysis Tab
 
1100
  )
1101
  min15_lookback_days = gr.Slider(
1102
  minimum=1,
1103
+ maximum=7, # 7 days for 15-minute data
1104
  value=3,
1105
  step=1,
1106
  label="Historical Lookback (Days)"
 
1112
  )
1113
  min15_predict_btn = gr.Button("Analyze Stock")
1114
  gr.Markdown("""
1115
+ **15-Minute Analysis Features:**
1116
+ - **Data Range**: Up to 7 days of historical data (vs 5 days previously)
1117
+ - **High-Frequency Metrics**: Intraday volatility, volume-price trends, momentum analysis
1118
+ - **Pre/Post Market Data**: Includes extended hours trading data
1119
+ - **Auto-Adjusted Data**: Automatically adjusted for splits and dividends
1120
+ - **Enhanced Technical Indicators**: Optimized for short-term trading
1121
+ - **Maximum prediction period**: 2 days
1122
+ - **Requires at least 64 data points for Chronos predictions**
1123
+ - **Data available during market hours only**
1124
  """)
1125
 
1126
  with gr.Column():
 
1135
  gr.Markdown("### Risk Analysis")
1136
  min15_risk_metrics = gr.JSON(label="Risk Metrics")
1137
 
1138
+ gr.Markdown("### Sector & Financial Analysis")
1139
  min15_sector_metrics = gr.JSON(label="Sector Metrics")
1140
 
1141
  def analyze_stock(symbol, timeframe, prediction_days, lookback_days, strategy):
 
1152
  "Industry": df['Industry'].iloc[-1],
1153
  "Dividend_Yield": df['Dividend_Yield'].iloc[-1],
1154
  "Avg_Daily_Volume": df['Avg_Daily_Volume'].iloc[-1],
1155
+ "Volume_Volatility": df['Volume_Volatility'].iloc[-1],
1156
+ "Enterprise_Value": df['Enterprise_Value'].iloc[-1],
1157
+ "P/E_Ratio": df['P/E_Ratio'].iloc[-1],
1158
+ "Forward_P/E": df['Forward_P/E'].iloc[-1],
1159
+ "PEG_Ratio": df['PEG_Ratio'].iloc[-1],
1160
+ "Price_to_Book": df['Price_to_Book'].iloc[-1],
1161
+ "Price_to_Sales": df['Price_to_Sales'].iloc[-1]
1162
  }
1163
 
1164
  # Calculate risk metrics
 
1167
  "Max_Drawdown": df['Max_Drawdown'].iloc[-1],
1168
  "Current_Drawdown": df['Drawdown'].iloc[-1],
1169
  "Sharpe_Ratio": (df['Returns'].mean() * 252) / (df['Returns'].std() * np.sqrt(252)),
1170
+ "Sortino_Ratio": (df['Returns'].mean() * 252) / (df['Returns'][df['Returns'] < 0].std() * np.sqrt(252)),
1171
+ "Return_on_Equity": df['Return_on_Equity'].iloc[-1],
1172
+ "Return_on_Assets": df['Return_on_Assets'].iloc[-1],
1173
+ "Debt_to_Equity": df['Debt_to_Equity'].iloc[-1],
1174
+ "Current_Ratio": df['Current_Ratio'].iloc[-1],
1175
+ "Quick_Ratio": df['Quick_Ratio'].iloc[-1]
1176
  }
1177
 
1178
  # Calculate sector metrics
 
1180
  "Sector": df['Sector'].iloc[-1],
1181
  "Industry": df['Industry'].iloc[-1],
1182
  "Market_Cap_Rank": "Large" if df['Market_Cap'].iloc[-1] > 1e10 else "Mid" if df['Market_Cap'].iloc[-1] > 1e9 else "Small",
1183
+ "Liquidity_Score": "High" if df['Avg_Daily_Volume'].iloc[-1] > 1e6 else "Medium" if df['Avg_Daily_Volume'].iloc[-1] > 1e5 else "Low",
1184
+ "Gross_Margin": df['Gross_Margin'].iloc[-1],
1185
+ "Operating_Margin": df['Operating_Margin'].iloc[-1],
1186
+ "Net_Margin": df['Net_Margin'].iloc[-1]
1187
  }
1188
 
1189
+ # Add intraday-specific metrics for shorter timeframes
1190
+ if timeframe in ["1h", "15m"]:
1191
+ intraday_metrics = {
1192
+ "Intraday_Volatility": df['Intraday_Volatility'].iloc[-1] if 'Intraday_Volatility' in df.columns else 0,
1193
+ "Volume_Ratio": df['Volume_Ratio'].iloc[-1] if 'Volume_Ratio' in df.columns else 0,
1194
+ "Price_Momentum": df['Price_Momentum'].iloc[-1] if 'Price_Momentum' in df.columns else 0,
1195
+ "Volume_Momentum": df['Volume_Momentum'].iloc[-1] if 'Volume_Momentum' in df.columns else 0,
1196
+ "Volume_Price_Trend": df['Volume_Price_Trend'].iloc[-1] if 'Volume_Price_Trend' in df.columns else 0
1197
+ }
1198
+ product_metrics.update(intraday_metrics)
1199
+
1200
  return signals, fig, product_metrics, risk_metrics, sector_metrics
1201
  except Exception as e:
1202
  error_message = str(e)
 
1247
  Args:
1248
  s (str): Stock symbol (e.g., "AAPL", "MSFT", "GOOGL")
1249
  pd (int): Number of days to predict (1-7)
1250
+ ld (int): Historical lookback period in days (1-60)
1251
  st (str): Prediction strategy to use ("chronos" or "technical")
1252
 
1253
  Returns:
 
1278
  Args:
1279
  s (str): Stock symbol (e.g., "AAPL", "MSFT", "GOOGL")
1280
  pd (int): Number of days to predict (1-2)
1281
+ ld (int): Historical lookback period in days (1-7)
1282
  st (str): Prediction strategy to use ("chronos" or "technical")
1283
 
1284
  Returns: