import pandas as pd import plotly.graph_objects as go import streamlit as st import yfinance as yf from plotly.subplots import make_subplots from scipy.stats import norm import numpy as np def calculate_macd( data: pd.DataFrame, short_window: int = 12, long_window: int = 26, signal_window: int = 9, ) -> pd.DataFrame: """ Calculate the Moving Average Convergence Divergence (MACD) and Signal line indicators. Parameters: data (pd.DataFrame): The dataframe containing stock price information. short_window (int): The number of periods for the shorter exponential moving average (EMA). Default is 12. long_window (int): The number of periods for the longer EMA. Default is 26. signal_window (int): The number of periods for the signal line EMA. Default is 9. Returns: pd.DataFrame: The input Dataframe with additional columns 'MACD' and 'Signal_Line' which contains the computed MACD values and signal line values respectively. Note: The function assumes that the input DataFrame contains a 'Close' column from which it computes the EMAs. """ # Calculate the Short term Exponential Moving Average short_ema = data.Close.ewm(span=short_window, adjust=False).mean() # Calculate the Long term Exponential Moving Average long_ema = data.Close.ewm(span=long_window, adjust=False).mean() # Compute MACD (short EMA - long EMA) data["MACD"] = short_ema - long_ema # Compute Signal Line (EMA of MACD) data["Signal_Line"] = data.MACD.ewm(span=signal_window, adjust=False).mean() return data def calculate_normalized_macd( data: pd.DataFrame, short_window: int = 12, long_window: int = 26, signal_window: int = 9, ) -> pd.DataFrame: """ Calculate the normalized Moving Average Convergence Divergence (MACD) and Signal line. The MACD is a trend-following momentum indicator that shows the relationship between two moving averages of a security's price. The MACD is calculated by subtracting the long-term exponential moving average (EMA) from the short-term EMA. A nine-day EMA of the MACD called the "Signal Line," is then plotted on top of the MACD, functioning as a trigger for buy and sell signals. This function adds a normalization step to the typical MACD calculation by standardizing the values using z-scores. Parameters: data (pd.DataFrame): The dataframe containing stock price information with a 'Close' column. short_window (int): The number of periods for the shorter EMA. Default is 12. long_window (int): The number of periods for the longer EMA. Default is 26. signal_window (int): The number of periods for the signal line EMA. Default is 9. Returns: pd.DataFrame: The input Dataframe is returned with additional columns 'MACD' and 'Signal_Line', which contains the computed normalized MACD and signal line values respectively. """ # Calculate the Short term Exponential Moving Average short_ema = data.Close.ewm(span=short_window, adjust=False).mean() # Calculate the Long term Exponential Moving Average long_ema = data.Close.ewm(span=long_window, adjust=False).mean() # Compute MACD (short EMA - long EMA) data["MACD"] = short_ema - long_ema # Compute Signal Line (EMA of MACD) data["Signal_Line"] = data.MACD.ewm(span=signal_window, adjust=False).mean() # Normalize the 'MACD' column using z-score normalization data["MACD"] = (data["MACD"] - data["MACD"].mean()) / data["MACD"].std() # Normalize the 'Signal_Line' column using z-score normalization data["Signal_Line"] = (data["Signal_Line"] - data["Signal_Line"].mean()) / data[ "Signal_Line" ].std() return data def calculate_percentile_macd( data: pd.DataFrame, short_window: int = 12, long_window: int = 26, signal_window: int = 9, ) -> pd.DataFrame: """ Calculate the percentile-based Moving Average Convergence Divergence (MACD) and Signal line. This function computes the MACD by subtracting the long-term exponential moving average (EMA) from the short-term EMA. It then calculates the Signal Line, which is a smoothing of the MACD values. After normalization using z-scores, the normalized MACD and Signal Line values are converted to percentiles, which are then rescaled to range from -100% to +100%. Parameters: data (pd.DataFrame): The dataframe containing stock price information with a 'Close' column. short_window (int): The number of periods for the shorter EMA. Default is 12. long_window (int): The number of periods for the longer EMA. Default is 26. signal_window (int): The number of periods for the signal line EMA. Default is 9. Returns: pd.DataFrame: The input Dataframe with additional columns 'MACD' and 'Signal_Line', representing the rescaled percentile values of the corresponding MACD and signal line calculations. """ # Calculate the Short term Exponential Moving Average short_ema = data.Close.ewm(span=short_window, adjust=False).mean() # Calculate the Long term Exponential Moving Average long_ema = data.Close.ewm(span=long_window, adjust=False).mean() # Compute MACD (short EMA - long EMA) data["MACD"] = short_ema - long_ema # Compute Signal Line (EMA of MACD) data["Signal_Line"] = data.MACD.ewm(span=signal_window, adjust=False).mean() # Normalize the 'MACD' column using z-score normalization data["MACD"] = (data["MACD"] - data["MACD"].mean()) / data["MACD"].std() # Normalize the 'Signal_Line' column using z-score normalization data["Signal_Line"] = (data["Signal_Line"] - data["Signal_Line"].mean()) / data[ "Signal_Line" ].std() # Convert normalized data to percentiles (CDF) and rescale to -100% to +100% # Rescaling allows comparing the relative position of the current value within the distribution data["MACD"] = norm.cdf(data["MACD"]) * 200 - 100 data["Signal_Line"] = norm.cdf(data["Signal_Line"]) * 200 - 100 return data def find_crossovers( df: pd.DataFrame, bullish_threshold: float, bearish_threshold: float ) -> pd.DataFrame: """ Identifies the bullish and bearish crossover points between MACD and Signal Line. This function checks where the MACD line crosses the Signal Line from below (bullish crossover) or from above (bearish crossover). It then marks these crossovers with a 1 for bullish or -1 for bearish within a new column in the DataFrame called 'Crossover'. Parameters: df (pd.DataFrame): The dataframe containing the columns 'MACD' and 'Signal_Line'. bullish_threshold (float): The threshold above which a crossover is considered bullish. bearish_threshold (float): The threshold below which a crossover is considered bearish. Returns: pd.DataFrame: The input DataFrame with an additional 'Crossover' column indicating the bullish (+1) and bearish (-1) crossovers. """ # Initialize 'Crossover' column to zero, indicating no crossover by default df["Crossover"] = 0 # Find bullish crossovers - when the MACD crosses the Signal Line from below # and the Signal Line is below the bullish threshold. crossover_indices = df.index[ (df["MACD"] > df["Signal_Line"]) & (df["MACD"].shift() < df["Signal_Line"].shift()) & (df["Signal_Line"] < bullish_threshold) ] # Mark the bullish crossovers with 1 in the 'Crossover' column df.loc[crossover_indices, "Crossover"] = 1 # Find bearish crossovers - when the MACD crosses the Signal Line from above # and the Signal Line is above the bearish threshold. crossover_indices = df.index[ (df["MACD"] < df["Signal_Line"]) & (df["MACD"].shift() > df["Signal_Line"].shift()) & (df["Signal_Line"] > bearish_threshold) ] # Mark the bearish crossovers with -1 in the 'Crossover' column df.loc[crossover_indices, "Crossover"] = -1 return df def get_fundamentals(ticker: str): """ Fetches the income statement, balance sheet, and cash flow statement for a given stock ticker. This function retrieves fundamental financial information about a stock using the yfinance library, which fetches this data from Yahoo Finance. Parameters: ticker (str): The stock symbol to query. Returns: tuple of pandas.DataFrame: A 3-tuple where the first element is an income statement DataFrame, the second is a balance sheet DataFrame, and the third is a cash flow statement DataFrame. """ # Create a Ticker object which allows access to Yahoo finance's vast data source stock = yf.Ticker(ticker) # Fetching and returning annual income statement, balance sheet, and cashflow data return stock.income_stmt, stock.balance_sheet, stock.cashflow def create_fig(data: pd.DataFrame, ticker: str) -> go.Figure: """ Creates a Plotly graph object (figure) that includes a candlestick plot of the stock prices, moving averages and a MACD (Moving Average Convergence Divergence) chart for the given data. Parameters: data (pandas.DataFrame): The input data containing the stock price information. It must include 'Close', 'Open', 'High', 'Low' columns and 'MACD', 'Signal_Line', 'Crossover' values calculated externally. ticker (str): The stock symbol used in subplot titles to indicate the stock being analyzed. Returns: plotly.graph_objs._figure.Figure: A figure object which includes the visualization of the stock prices with moving averages and a MACD chart. """ # Calculate moving averages data["MA12"] = data["Close"].rolling(window=12).mean() data["MA26"] = data["Close"].rolling(window=26).mean() data["MA50"] = data["Close"].rolling(window=50).mean() data["MA200"] = data["Close"].rolling(window=200).mean() # Initialize figure with subplots fig = make_subplots( rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.02, subplot_titles=(f"{ticker} Candlestick", "MACD"), row_width=[0.2, 0.7], ) # Add Candlestick trace fig.add_trace( go.Candlestick( x=data.index, open=data["Open"], high=data["High"], low=data["Low"], close=data["Close"], name="Candlestick", ), row=1, col=1, ) # Add Moving Average traces for ma, color in zip( ["MA12", "MA26", "MA50", "MA200"], ["magenta", "cyan", "yellow", "black"] ): fig.add_trace( go.Scatter( x=data.index, y=data[ma], line=dict(color=color, width=1.5), name=f"{ma} days MA", ), row=1, col=1, ) # Add MACD and Signal Line traces fig.add_trace( go.Scatter( x=data.index, y=data["MACD"], line=dict(color="blue", width=2), name="MACD" ), row=2, col=1, ) fig.add_trace( go.Scatter( x=data.index, y=data["Signal_Line"], line=dict(color="orange", width=2), name="Signal Line", ), row=2, col=1, ) # Add markers for Bullish and Bearish crossovers on MACD chart fig.add_trace( go.Scatter( mode="markers", x=data[data["Crossover"] == 1].index, y=data[data["Crossover"] == 1]["MACD"], marker_symbol="triangle-up", marker_color="green", marker_size=20, name="Bullish Crossover (MACD) ✅", ), row=2, col=1, ) fig.add_trace( go.Scatter( mode="markers", x=data[data["Crossover"] == -1].index, y=data[data["Crossover"] == -1]["MACD"], marker_symbol="triangle-down", marker_color="red", marker_size=20, name="Bearish Crossover (MACD) 🈲", ), row=2, col=1, ) # Add markers for Bullish and Bearish crossovers on the Candlestick chart fig.add_trace( go.Scatter( mode="markers", x=data[data["Crossover"] == 1].index, y=data[data["Crossover"] == 1]["Close"], marker_symbol="triangle-up", marker_color="green", marker_size=25, name="Bullish Crossover (Close) ✅", ), row=1, col=1, ) fig.add_trace( go.Scatter( mode="markers", x=data[data["Crossover"] == -1].index, y=data[data["Crossover"] == -1]["Close"], marker_symbol="triangle-down", marker_color="red", marker_size=25, name="Bearish Crossover (Close) 🈲", ), row=1, col=1, ) # Update layout configurations fig.update_layout( xaxis_rangeslider_visible=False, height=800, # Define the height of the figure ) return fig def generate_simulated_data(data: pd.DataFrame, num_days: int) -> pd.DataFrame: """ Generates simulated future data for a given DataFrame based on the statistical characteristics (mean and standard deviation) of the input data. The simulation assumes normally distributed returns and extrapolates future values by computing the cumulative product of random returns. Parameters: data (pandas.DataFrame): The historical data on which the simulation will be based. The index must be date-based. num_days (int): The number of days into the future for which data should be simulated. Returns: pandas.DataFrame: A DataFrame containing the original historical data appended with the simulated future data. """ # Compute mean and standard deviation for each column means = data.mean() stds = data.std() # Generate random returns from normal distribution random_returns = pd.DataFrame() for col in data.columns: random_returns[col] = np.random.normal(loc=means[col], scale=stds[col], size=num_days) # Add 1 to the returns random_returns += 1 # Compute cumulative product to get factors factors = random_returns.cumprod() # Generate future dates last_date = data.index[-1] future_dates = pd.date_range(start=last_date + pd.DateOffset(days=1), periods=num_days) # Append future factors to original data future_data = pd.DataFrame(index=future_dates, columns=data.columns, data=factors.values) # Concatenate original data and future data simulated_data = pd.concat([data, future_data]) return simulated_data