Spaces:
Running
Running
import pandas as pd | |
import plotly.graph_objects as go | |
import streamlit as st | |
import yfinance as yf | |
from plotly.subplots import make_subplots | |
from scipy.stats import norm | |
import numpy as np | |
def calculate_macd( | |
data: pd.DataFrame, | |
short_window: int = 12, | |
long_window: int = 26, | |
signal_window: int = 9, | |
) -> pd.DataFrame: | |
""" | |
Calculate the Moving Average Convergence Divergence (MACD) and Signal line indicators. | |
Parameters: | |
data (pd.DataFrame): The dataframe containing stock price information. | |
short_window (int): The number of periods for the shorter exponential moving average (EMA). | |
Default is 12. | |
long_window (int): The number of periods for the longer EMA. Default is 26. | |
signal_window (int): The number of periods for the signal line EMA. Default is 9. | |
Returns: | |
pd.DataFrame: The input Dataframe with additional columns 'MACD' and 'Signal_Line' | |
which contains the computed MACD values and signal line values respectively. | |
Note: The function assumes that the input DataFrame contains a 'Close' column from which it computes the EMAs. | |
""" | |
# Calculate the Short term Exponential Moving Average | |
short_ema = data.Close.ewm(span=short_window, adjust=False).mean() | |
# Calculate the Long term Exponential Moving Average | |
long_ema = data.Close.ewm(span=long_window, adjust=False).mean() | |
# Compute MACD (short EMA - long EMA) | |
data["MACD"] = short_ema - long_ema | |
# Compute Signal Line (EMA of MACD) | |
data["Signal_Line"] = data.MACD.ewm(span=signal_window, adjust=False).mean() | |
return data | |
def calculate_normalized_macd( | |
data: pd.DataFrame, | |
short_window: int = 12, | |
long_window: int = 26, | |
signal_window: int = 9, | |
) -> pd.DataFrame: | |
""" | |
Calculate the normalized Moving Average Convergence Divergence (MACD) and Signal line. | |
The MACD is a trend-following momentum indicator that shows the relationship between | |
two moving averages of a security's price. The MACD is calculated by subtracting the | |
long-term exponential moving average (EMA) from the short-term EMA. A nine-day EMA of | |
the MACD called the "Signal Line," is then plotted on top of the MACD, functioning as | |
a trigger for buy and sell signals. | |
This function adds a normalization step to the typical MACD calculation by standardizing | |
the values using z-scores. | |
Parameters: | |
data (pd.DataFrame): The dataframe containing stock price information with a 'Close' column. | |
short_window (int): The number of periods for the shorter EMA. Default is 12. | |
long_window (int): The number of periods for the longer EMA. Default is 26. | |
signal_window (int): The number of periods for the signal line EMA. Default is 9. | |
Returns: | |
pd.DataFrame: The input Dataframe is returned with additional columns 'MACD' and 'Signal_Line', | |
which contains the computed normalized MACD and signal line values respectively. | |
""" | |
# Calculate the Short term Exponential Moving Average | |
short_ema = data.Close.ewm(span=short_window, adjust=False).mean() | |
# Calculate the Long term Exponential Moving Average | |
long_ema = data.Close.ewm(span=long_window, adjust=False).mean() | |
# Compute MACD (short EMA - long EMA) | |
data["MACD"] = short_ema - long_ema | |
# Compute Signal Line (EMA of MACD) | |
data["Signal_Line"] = data.MACD.ewm(span=signal_window, adjust=False).mean() | |
# Normalize the 'MACD' column using z-score normalization | |
data["MACD"] = (data["MACD"] - data["MACD"].mean()) / data["MACD"].std() | |
# Normalize the 'Signal_Line' column using z-score normalization | |
data["Signal_Line"] = (data["Signal_Line"] - data["Signal_Line"].mean()) / data[ | |
"Signal_Line" | |
].std() | |
return data | |
def calculate_percentile_macd( | |
data: pd.DataFrame, | |
short_window: int = 12, | |
long_window: int = 26, | |
signal_window: int = 9, | |
) -> pd.DataFrame: | |
""" | |
Calculate the percentile-based Moving Average Convergence Divergence (MACD) and Signal line. | |
This function computes the MACD by subtracting the long-term exponential moving average (EMA) | |
from the short-term EMA. It then calculates the Signal Line, which is a smoothing of the MACD | |
values. After normalization using z-scores, the normalized MACD and Signal Line values are converted | |
to percentiles, which are then rescaled to range from -100% to +100%. | |
Parameters: | |
data (pd.DataFrame): The dataframe containing stock price information with a 'Close' column. | |
short_window (int): The number of periods for the shorter EMA. Default is 12. | |
long_window (int): The number of periods for the longer EMA. Default is 26. | |
signal_window (int): The number of periods for the signal line EMA. Default is 9. | |
Returns: | |
pd.DataFrame: The input Dataframe with additional columns 'MACD' and 'Signal_Line', representing | |
the rescaled percentile values of the corresponding MACD and signal line calculations. | |
""" | |
# Calculate the Short term Exponential Moving Average | |
short_ema = data.Close.ewm(span=short_window, adjust=False).mean() | |
# Calculate the Long term Exponential Moving Average | |
long_ema = data.Close.ewm(span=long_window, adjust=False).mean() | |
# Compute MACD (short EMA - long EMA) | |
data["MACD"] = short_ema - long_ema | |
# Compute Signal Line (EMA of MACD) | |
data["Signal_Line"] = data.MACD.ewm(span=signal_window, adjust=False).mean() | |
# Normalize the 'MACD' column using z-score normalization | |
data["MACD"] = (data["MACD"] - data["MACD"].mean()) / data["MACD"].std() | |
# Normalize the 'Signal_Line' column using z-score normalization | |
data["Signal_Line"] = (data["Signal_Line"] - data["Signal_Line"].mean()) / data[ | |
"Signal_Line" | |
].std() | |
# Convert normalized data to percentiles (CDF) and rescale to -100% to +100% | |
# Rescaling allows comparing the relative position of the current value within the distribution | |
data["MACD"] = norm.cdf(data["MACD"]) * 200 - 100 | |
data["Signal_Line"] = norm.cdf(data["Signal_Line"]) * 200 - 100 | |
return data | |
def find_crossovers( | |
df: pd.DataFrame, bullish_threshold: float, bearish_threshold: float | |
) -> pd.DataFrame: | |
""" | |
Identifies the bullish and bearish crossover points between MACD and Signal Line. | |
This function checks where the MACD line crosses the Signal Line from below (bullish crossover) | |
or from above (bearish crossover). It then marks these crossovers with a 1 for bullish or -1 | |
for bearish within a new column in the DataFrame called 'Crossover'. | |
Parameters: | |
df (pd.DataFrame): The dataframe containing the columns 'MACD' and 'Signal_Line'. | |
bullish_threshold (float): The threshold above which a crossover is considered bullish. | |
bearish_threshold (float): The threshold below which a crossover is considered bearish. | |
Returns: | |
pd.DataFrame: The input DataFrame with an additional 'Crossover' column indicating | |
the bullish (+1) and bearish (-1) crossovers. | |
""" | |
# Initialize 'Crossover' column to zero, indicating no crossover by default | |
df["Crossover"] = 0 | |
# Find bullish crossovers - when the MACD crosses the Signal Line from below | |
# and the Signal Line is below the bullish threshold. | |
crossover_indices = df.index[ | |
(df["MACD"] > df["Signal_Line"]) | |
& (df["MACD"].shift() < df["Signal_Line"].shift()) | |
& (df["Signal_Line"] < bullish_threshold) | |
] | |
# Mark the bullish crossovers with 1 in the 'Crossover' column | |
df.loc[crossover_indices, "Crossover"] = 1 | |
# Find bearish crossovers - when the MACD crosses the Signal Line from above | |
# and the Signal Line is above the bearish threshold. | |
crossover_indices = df.index[ | |
(df["MACD"] < df["Signal_Line"]) | |
& (df["MACD"].shift() > df["Signal_Line"].shift()) | |
& (df["Signal_Line"] > bearish_threshold) | |
] | |
# Mark the bearish crossovers with -1 in the 'Crossover' column | |
df.loc[crossover_indices, "Crossover"] = -1 | |
return df | |
def get_fundamentals(ticker: str): | |
""" | |
Fetches the income statement, balance sheet, and cash flow statement for a given stock ticker. | |
This function retrieves fundamental financial information about a stock using the yfinance library, | |
which fetches this data from Yahoo Finance. | |
Parameters: | |
ticker (str): The stock symbol to query. | |
Returns: | |
tuple of pandas.DataFrame: A 3-tuple where the first element is an income statement DataFrame, | |
the second is a balance sheet DataFrame, and the third | |
is a cash flow statement DataFrame. | |
""" | |
# Create a Ticker object which allows access to Yahoo finance's vast data source | |
stock = yf.Ticker(ticker) | |
# Fetching and returning annual income statement, balance sheet, and cashflow data | |
return stock.income_stmt, stock.balance_sheet, stock.cashflow | |
def create_fig(data: pd.DataFrame, ticker: str) -> go.Figure: | |
""" | |
Creates a Plotly graph object (figure) that includes a candlestick plot of the stock prices, | |
moving averages and a MACD (Moving Average Convergence Divergence) chart for the given data. | |
Parameters: | |
data (pandas.DataFrame): The input data containing the stock price information. | |
It must include 'Close', 'Open', 'High', 'Low' columns and | |
'MACD', 'Signal_Line', 'Crossover' values calculated externally. | |
ticker (str): The stock symbol used in subplot titles to indicate the stock being analyzed. | |
Returns: | |
plotly.graph_objs._figure.Figure: A figure object which includes the visualization of | |
the stock prices with moving averages and a MACD chart. | |
""" | |
# Calculate moving averages | |
data["MA12"] = data["Close"].rolling(window=12).mean() | |
data["MA26"] = data["Close"].rolling(window=26).mean() | |
data["MA50"] = data["Close"].rolling(window=50).mean() | |
data["MA200"] = data["Close"].rolling(window=200).mean() | |
# Initialize figure with subplots | |
fig = make_subplots( | |
rows=2, | |
cols=1, | |
shared_xaxes=True, | |
vertical_spacing=0.02, | |
subplot_titles=(f"{ticker} Candlestick", "MACD"), | |
row_width=[0.2, 0.7], | |
) | |
# Add Candlestick trace | |
fig.add_trace( | |
go.Candlestick( | |
x=data.index, | |
open=data["Open"], | |
high=data["High"], | |
low=data["Low"], | |
close=data["Close"], | |
name="Candlestick", | |
), | |
row=1, | |
col=1, | |
) | |
# Add Moving Average traces | |
for ma, color in zip( | |
["MA12", "MA26", "MA50", "MA200"], ["magenta", "cyan", "yellow", "black"] | |
): | |
fig.add_trace( | |
go.Scatter( | |
x=data.index, | |
y=data[ma], | |
line=dict(color=color, width=1.5), | |
name=f"{ma} days MA", | |
), | |
row=1, | |
col=1, | |
) | |
# Add MACD and Signal Line traces | |
fig.add_trace( | |
go.Scatter( | |
x=data.index, y=data["MACD"], line=dict(color="blue", width=2), name="MACD" | |
), | |
row=2, | |
col=1, | |
) | |
fig.add_trace( | |
go.Scatter( | |
x=data.index, | |
y=data["Signal_Line"], | |
line=dict(color="orange", width=2), | |
name="Signal Line", | |
), | |
row=2, | |
col=1, | |
) | |
# Add markers for Bullish and Bearish crossovers on MACD chart | |
fig.add_trace( | |
go.Scatter( | |
mode="markers", | |
x=data[data["Crossover"] == 1].index, | |
y=data[data["Crossover"] == 1]["MACD"], | |
marker_symbol="triangle-up", | |
marker_color="green", | |
marker_size=20, | |
name="Bullish Crossover (MACD) ✅", | |
), | |
row=2, | |
col=1, | |
) | |
fig.add_trace( | |
go.Scatter( | |
mode="markers", | |
x=data[data["Crossover"] == -1].index, | |
y=data[data["Crossover"] == -1]["MACD"], | |
marker_symbol="triangle-down", | |
marker_color="red", | |
marker_size=20, | |
name="Bearish Crossover (MACD) 🈲", | |
), | |
row=2, | |
col=1, | |
) | |
# Add markers for Bullish and Bearish crossovers on the Candlestick chart | |
fig.add_trace( | |
go.Scatter( | |
mode="markers", | |
x=data[data["Crossover"] == 1].index, | |
y=data[data["Crossover"] == 1]["Close"], | |
marker_symbol="triangle-up", | |
marker_color="green", | |
marker_size=25, | |
name="Bullish Crossover (Close) ✅", | |
), | |
row=1, | |
col=1, | |
) | |
fig.add_trace( | |
go.Scatter( | |
mode="markers", | |
x=data[data["Crossover"] == -1].index, | |
y=data[data["Crossover"] == -1]["Close"], | |
marker_symbol="triangle-down", | |
marker_color="red", | |
marker_size=25, | |
name="Bearish Crossover (Close) 🈲", | |
), | |
row=1, | |
col=1, | |
) | |
# Update layout configurations | |
fig.update_layout( | |
xaxis_rangeslider_visible=False, | |
height=800, # Define the height of the figure | |
) | |
return fig | |
def generate_simulated_data(data: pd.DataFrame, num_days: int) -> pd.DataFrame: | |
""" | |
Generates simulated future data for a given DataFrame based on the statistical characteristics | |
(mean and standard deviation) of the input data. | |
The simulation assumes normally distributed returns and extrapolates future values by computing | |
the cumulative product of random returns. | |
Parameters: | |
data (pandas.DataFrame): The historical data on which the simulation will be based. The index must be date-based. | |
num_days (int): The number of days into the future for which data should be simulated. | |
Returns: | |
pandas.DataFrame: A DataFrame containing the original historical data appended with the simulated future data. | |
""" | |
# Compute mean and standard deviation for each column | |
means = data.mean() | |
stds = data.std() | |
# Generate random returns from normal distribution | |
random_returns = pd.DataFrame() | |
for col in data.columns: | |
random_returns[col] = np.random.normal(loc=means[col], scale=stds[col], size=num_days) | |
# Add 1 to the returns | |
random_returns += 1 | |
# Compute cumulative product to get factors | |
factors = random_returns.cumprod() | |
# Generate future dates | |
last_date = data.index[-1] | |
future_dates = pd.date_range(start=last_date + pd.DateOffset(days=1), periods=num_days) | |
# Append future factors to original data | |
future_data = pd.DataFrame(index=future_dates, columns=data.columns, data=factors.values) | |
# Concatenate original data and future data | |
simulated_data = pd.concat([data, future_data]) | |
return simulated_data |