Spaces:
Running
Running
File size: 15,096 Bytes
382f27a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 |
import pandas as pd
import plotly.graph_objects as go
import streamlit as st
import yfinance as yf
from plotly.subplots import make_subplots
from scipy.stats import norm
import numpy as np
def calculate_macd(
data: pd.DataFrame,
short_window: int = 12,
long_window: int = 26,
signal_window: int = 9,
) -> pd.DataFrame:
"""
Calculate the Moving Average Convergence Divergence (MACD) and Signal line indicators.
Parameters:
data (pd.DataFrame): The dataframe containing stock price information.
short_window (int): The number of periods for the shorter exponential moving average (EMA).
Default is 12.
long_window (int): The number of periods for the longer EMA. Default is 26.
signal_window (int): The number of periods for the signal line EMA. Default is 9.
Returns:
pd.DataFrame: The input Dataframe with additional columns 'MACD' and 'Signal_Line'
which contains the computed MACD values and signal line values respectively.
Note: The function assumes that the input DataFrame contains a 'Close' column from which it computes the EMAs.
"""
# Calculate the Short term Exponential Moving Average
short_ema = data.Close.ewm(span=short_window, adjust=False).mean()
# Calculate the Long term Exponential Moving Average
long_ema = data.Close.ewm(span=long_window, adjust=False).mean()
# Compute MACD (short EMA - long EMA)
data["MACD"] = short_ema - long_ema
# Compute Signal Line (EMA of MACD)
data["Signal_Line"] = data.MACD.ewm(span=signal_window, adjust=False).mean()
return data
def calculate_normalized_macd(
data: pd.DataFrame,
short_window: int = 12,
long_window: int = 26,
signal_window: int = 9,
) -> pd.DataFrame:
"""
Calculate the normalized Moving Average Convergence Divergence (MACD) and Signal line.
The MACD is a trend-following momentum indicator that shows the relationship between
two moving averages of a security's price. The MACD is calculated by subtracting the
long-term exponential moving average (EMA) from the short-term EMA. A nine-day EMA of
the MACD called the "Signal Line," is then plotted on top of the MACD, functioning as
a trigger for buy and sell signals.
This function adds a normalization step to the typical MACD calculation by standardizing
the values using z-scores.
Parameters:
data (pd.DataFrame): The dataframe containing stock price information with a 'Close' column.
short_window (int): The number of periods for the shorter EMA. Default is 12.
long_window (int): The number of periods for the longer EMA. Default is 26.
signal_window (int): The number of periods for the signal line EMA. Default is 9.
Returns:
pd.DataFrame: The input Dataframe is returned with additional columns 'MACD' and 'Signal_Line',
which contains the computed normalized MACD and signal line values respectively.
"""
# Calculate the Short term Exponential Moving Average
short_ema = data.Close.ewm(span=short_window, adjust=False).mean()
# Calculate the Long term Exponential Moving Average
long_ema = data.Close.ewm(span=long_window, adjust=False).mean()
# Compute MACD (short EMA - long EMA)
data["MACD"] = short_ema - long_ema
# Compute Signal Line (EMA of MACD)
data["Signal_Line"] = data.MACD.ewm(span=signal_window, adjust=False).mean()
# Normalize the 'MACD' column using z-score normalization
data["MACD"] = (data["MACD"] - data["MACD"].mean()) / data["MACD"].std()
# Normalize the 'Signal_Line' column using z-score normalization
data["Signal_Line"] = (data["Signal_Line"] - data["Signal_Line"].mean()) / data[
"Signal_Line"
].std()
return data
def calculate_percentile_macd(
data: pd.DataFrame,
short_window: int = 12,
long_window: int = 26,
signal_window: int = 9,
) -> pd.DataFrame:
"""
Calculate the percentile-based Moving Average Convergence Divergence (MACD) and Signal line.
This function computes the MACD by subtracting the long-term exponential moving average (EMA)
from the short-term EMA. It then calculates the Signal Line, which is a smoothing of the MACD
values. After normalization using z-scores, the normalized MACD and Signal Line values are converted
to percentiles, which are then rescaled to range from -100% to +100%.
Parameters:
data (pd.DataFrame): The dataframe containing stock price information with a 'Close' column.
short_window (int): The number of periods for the shorter EMA. Default is 12.
long_window (int): The number of periods for the longer EMA. Default is 26.
signal_window (int): The number of periods for the signal line EMA. Default is 9.
Returns:
pd.DataFrame: The input Dataframe with additional columns 'MACD' and 'Signal_Line', representing
the rescaled percentile values of the corresponding MACD and signal line calculations.
"""
# Calculate the Short term Exponential Moving Average
short_ema = data.Close.ewm(span=short_window, adjust=False).mean()
# Calculate the Long term Exponential Moving Average
long_ema = data.Close.ewm(span=long_window, adjust=False).mean()
# Compute MACD (short EMA - long EMA)
data["MACD"] = short_ema - long_ema
# Compute Signal Line (EMA of MACD)
data["Signal_Line"] = data.MACD.ewm(span=signal_window, adjust=False).mean()
# Normalize the 'MACD' column using z-score normalization
data["MACD"] = (data["MACD"] - data["MACD"].mean()) / data["MACD"].std()
# Normalize the 'Signal_Line' column using z-score normalization
data["Signal_Line"] = (data["Signal_Line"] - data["Signal_Line"].mean()) / data[
"Signal_Line"
].std()
# Convert normalized data to percentiles (CDF) and rescale to -100% to +100%
# Rescaling allows comparing the relative position of the current value within the distribution
data["MACD"] = norm.cdf(data["MACD"]) * 200 - 100
data["Signal_Line"] = norm.cdf(data["Signal_Line"]) * 200 - 100
return data
def find_crossovers(
df: pd.DataFrame, bullish_threshold: float, bearish_threshold: float
) -> pd.DataFrame:
"""
Identifies the bullish and bearish crossover points between MACD and Signal Line.
This function checks where the MACD line crosses the Signal Line from below (bullish crossover)
or from above (bearish crossover). It then marks these crossovers with a 1 for bullish or -1
for bearish within a new column in the DataFrame called 'Crossover'.
Parameters:
df (pd.DataFrame): The dataframe containing the columns 'MACD' and 'Signal_Line'.
bullish_threshold (float): The threshold above which a crossover is considered bullish.
bearish_threshold (float): The threshold below which a crossover is considered bearish.
Returns:
pd.DataFrame: The input DataFrame with an additional 'Crossover' column indicating
the bullish (+1) and bearish (-1) crossovers.
"""
# Initialize 'Crossover' column to zero, indicating no crossover by default
df["Crossover"] = 0
# Find bullish crossovers - when the MACD crosses the Signal Line from below
# and the Signal Line is below the bullish threshold.
crossover_indices = df.index[
(df["MACD"] > df["Signal_Line"])
& (df["MACD"].shift() < df["Signal_Line"].shift())
& (df["Signal_Line"] < bullish_threshold)
]
# Mark the bullish crossovers with 1 in the 'Crossover' column
df.loc[crossover_indices, "Crossover"] = 1
# Find bearish crossovers - when the MACD crosses the Signal Line from above
# and the Signal Line is above the bearish threshold.
crossover_indices = df.index[
(df["MACD"] < df["Signal_Line"])
& (df["MACD"].shift() > df["Signal_Line"].shift())
& (df["Signal_Line"] > bearish_threshold)
]
# Mark the bearish crossovers with -1 in the 'Crossover' column
df.loc[crossover_indices, "Crossover"] = -1
return df
def get_fundamentals(ticker: str):
"""
Fetches the income statement, balance sheet, and cash flow statement for a given stock ticker.
This function retrieves fundamental financial information about a stock using the yfinance library,
which fetches this data from Yahoo Finance.
Parameters:
ticker (str): The stock symbol to query.
Returns:
tuple of pandas.DataFrame: A 3-tuple where the first element is an income statement DataFrame,
the second is a balance sheet DataFrame, and the third
is a cash flow statement DataFrame.
"""
# Create a Ticker object which allows access to Yahoo finance's vast data source
stock = yf.Ticker(ticker)
# Fetching and returning annual income statement, balance sheet, and cashflow data
return stock.income_stmt, stock.balance_sheet, stock.cashflow
def create_fig(data: pd.DataFrame, ticker: str) -> go.Figure:
"""
Creates a Plotly graph object (figure) that includes a candlestick plot of the stock prices,
moving averages and a MACD (Moving Average Convergence Divergence) chart for the given data.
Parameters:
data (pandas.DataFrame): The input data containing the stock price information.
It must include 'Close', 'Open', 'High', 'Low' columns and
'MACD', 'Signal_Line', 'Crossover' values calculated externally.
ticker (str): The stock symbol used in subplot titles to indicate the stock being analyzed.
Returns:
plotly.graph_objs._figure.Figure: A figure object which includes the visualization of
the stock prices with moving averages and a MACD chart.
"""
# Calculate moving averages
data["MA12"] = data["Close"].rolling(window=12).mean()
data["MA26"] = data["Close"].rolling(window=26).mean()
data["MA50"] = data["Close"].rolling(window=50).mean()
data["MA200"] = data["Close"].rolling(window=200).mean()
# Initialize figure with subplots
fig = make_subplots(
rows=2,
cols=1,
shared_xaxes=True,
vertical_spacing=0.02,
subplot_titles=(f"{ticker} Candlestick", "MACD"),
row_width=[0.2, 0.7],
)
# Add Candlestick trace
fig.add_trace(
go.Candlestick(
x=data.index,
open=data["Open"],
high=data["High"],
low=data["Low"],
close=data["Close"],
name="Candlestick",
),
row=1,
col=1,
)
# Add Moving Average traces
for ma, color in zip(
["MA12", "MA26", "MA50", "MA200"], ["magenta", "cyan", "yellow", "black"]
):
fig.add_trace(
go.Scatter(
x=data.index,
y=data[ma],
line=dict(color=color, width=1.5),
name=f"{ma} days MA",
),
row=1,
col=1,
)
# Add MACD and Signal Line traces
fig.add_trace(
go.Scatter(
x=data.index, y=data["MACD"], line=dict(color="blue", width=2), name="MACD"
),
row=2,
col=1,
)
fig.add_trace(
go.Scatter(
x=data.index,
y=data["Signal_Line"],
line=dict(color="orange", width=2),
name="Signal Line",
),
row=2,
col=1,
)
# Add markers for Bullish and Bearish crossovers on MACD chart
fig.add_trace(
go.Scatter(
mode="markers",
x=data[data["Crossover"] == 1].index,
y=data[data["Crossover"] == 1]["MACD"],
marker_symbol="triangle-up",
marker_color="green",
marker_size=20,
name="Bullish Crossover (MACD) ✅",
),
row=2,
col=1,
)
fig.add_trace(
go.Scatter(
mode="markers",
x=data[data["Crossover"] == -1].index,
y=data[data["Crossover"] == -1]["MACD"],
marker_symbol="triangle-down",
marker_color="red",
marker_size=20,
name="Bearish Crossover (MACD) 🈲",
),
row=2,
col=1,
)
# Add markers for Bullish and Bearish crossovers on the Candlestick chart
fig.add_trace(
go.Scatter(
mode="markers",
x=data[data["Crossover"] == 1].index,
y=data[data["Crossover"] == 1]["Close"],
marker_symbol="triangle-up",
marker_color="green",
marker_size=25,
name="Bullish Crossover (Close) ✅",
),
row=1,
col=1,
)
fig.add_trace(
go.Scatter(
mode="markers",
x=data[data["Crossover"] == -1].index,
y=data[data["Crossover"] == -1]["Close"],
marker_symbol="triangle-down",
marker_color="red",
marker_size=25,
name="Bearish Crossover (Close) 🈲",
),
row=1,
col=1,
)
# Update layout configurations
fig.update_layout(
xaxis_rangeslider_visible=False,
height=800, # Define the height of the figure
)
return fig
def generate_simulated_data(data: pd.DataFrame, num_days: int) -> pd.DataFrame:
"""
Generates simulated future data for a given DataFrame based on the statistical characteristics
(mean and standard deviation) of the input data.
The simulation assumes normally distributed returns and extrapolates future values by computing
the cumulative product of random returns.
Parameters:
data (pandas.DataFrame): The historical data on which the simulation will be based. The index must be date-based.
num_days (int): The number of days into the future for which data should be simulated.
Returns:
pandas.DataFrame: A DataFrame containing the original historical data appended with the simulated future data.
"""
# Compute mean and standard deviation for each column
means = data.mean()
stds = data.std()
# Generate random returns from normal distribution
random_returns = pd.DataFrame()
for col in data.columns:
random_returns[col] = np.random.normal(loc=means[col], scale=stds[col], size=num_days)
# Add 1 to the returns
random_returns += 1
# Compute cumulative product to get factors
factors = random_returns.cumprod()
# Generate future dates
last_date = data.index[-1]
future_dates = pd.date_range(start=last_date + pd.DateOffset(days=1), periods=num_days)
# Append future factors to original data
future_data = pd.DataFrame(index=future_dates, columns=data.columns, data=factors.values)
# Concatenate original data and future data
simulated_data = pd.concat([data, future_data])
return simulated_data |