File size: 15,096 Bytes
382f27a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
import pandas as pd
import plotly.graph_objects as go
import streamlit as st
import yfinance as yf
from plotly.subplots import make_subplots
from scipy.stats import norm
import numpy as np


def calculate_macd(
    data: pd.DataFrame,
    short_window: int = 12,
    long_window: int = 26,
    signal_window: int = 9,
) -> pd.DataFrame:
    """
    Calculate the Moving Average Convergence Divergence (MACD) and Signal line indicators.

    Parameters:
        data (pd.DataFrame): The dataframe containing stock price information.
        short_window (int): The number of periods for the shorter exponential moving average (EMA).
                            Default is 12.
        long_window (int): The number of periods for the longer EMA. Default is 26.
        signal_window (int): The number of periods for the signal line EMA. Default is 9.

    Returns:
        pd.DataFrame: The input Dataframe with additional columns 'MACD' and 'Signal_Line'
                      which contains the computed MACD values and signal line values respectively.

    Note: The function assumes that the input DataFrame contains a 'Close' column from which it computes the EMAs.
    """
    # Calculate the Short term Exponential Moving Average
    short_ema = data.Close.ewm(span=short_window, adjust=False).mean()

    # Calculate the Long term Exponential Moving Average
    long_ema = data.Close.ewm(span=long_window, adjust=False).mean()

    # Compute MACD (short EMA - long EMA)
    data["MACD"] = short_ema - long_ema

    # Compute Signal Line (EMA of MACD)
    data["Signal_Line"] = data.MACD.ewm(span=signal_window, adjust=False).mean()

    return data


def calculate_normalized_macd(
    data: pd.DataFrame,
    short_window: int = 12,
    long_window: int = 26,
    signal_window: int = 9,
) -> pd.DataFrame:
    """
    Calculate the normalized Moving Average Convergence Divergence (MACD) and Signal line.

    The MACD is a trend-following momentum indicator that shows the relationship between
    two moving averages of a security's price. The MACD is calculated by subtracting the
    long-term exponential moving average (EMA) from the short-term EMA. A nine-day EMA of
    the MACD called the "Signal Line," is then plotted on top of the MACD, functioning as
    a trigger for buy and sell signals.

    This function adds a normalization step to the typical MACD calculation by standardizing
    the values using z-scores.

    Parameters:
        data (pd.DataFrame): The dataframe containing stock price information with a 'Close' column.
        short_window (int): The number of periods for the shorter EMA. Default is 12.
        long_window (int): The number of periods for the longer EMA. Default is 26.
        signal_window (int): The number of periods for the signal line EMA. Default is 9.

    Returns:
        pd.DataFrame: The input Dataframe is returned with additional columns 'MACD' and 'Signal_Line',
                      which contains the computed normalized MACD and signal line values respectively.
    """
    # Calculate the Short term Exponential Moving Average
    short_ema = data.Close.ewm(span=short_window, adjust=False).mean()

    # Calculate the Long term Exponential Moving Average
    long_ema = data.Close.ewm(span=long_window, adjust=False).mean()

    # Compute MACD (short EMA - long EMA)
    data["MACD"] = short_ema - long_ema

    # Compute Signal Line (EMA of MACD)
    data["Signal_Line"] = data.MACD.ewm(span=signal_window, adjust=False).mean()

    # Normalize the 'MACD' column using z-score normalization
    data["MACD"] = (data["MACD"] - data["MACD"].mean()) / data["MACD"].std()

    # Normalize the 'Signal_Line' column using z-score normalization
    data["Signal_Line"] = (data["Signal_Line"] - data["Signal_Line"].mean()) / data[
        "Signal_Line"
    ].std()

    return data


def calculate_percentile_macd(
    data: pd.DataFrame,
    short_window: int = 12,
    long_window: int = 26,
    signal_window: int = 9,
) -> pd.DataFrame:
    """
    Calculate the percentile-based Moving Average Convergence Divergence (MACD) and Signal line.

    This function computes the MACD by subtracting the long-term exponential moving average (EMA)
    from the short-term EMA. It then calculates the Signal Line, which is a smoothing of the MACD
    values. After normalization using z-scores, the normalized MACD and Signal Line values are converted
    to percentiles, which are then rescaled to range from -100% to +100%.

    Parameters:
        data (pd.DataFrame): The dataframe containing stock price information with a 'Close' column.
        short_window (int): The number of periods for the shorter EMA. Default is 12.
        long_window (int): The number of periods for the longer EMA. Default is 26.
        signal_window (int): The number of periods for the signal line EMA. Default is 9.

    Returns:
        pd.DataFrame: The input Dataframe with additional columns 'MACD' and 'Signal_Line', representing
                      the rescaled percentile values of the corresponding MACD and signal line calculations.
    """
    # Calculate the Short term Exponential Moving Average
    short_ema = data.Close.ewm(span=short_window, adjust=False).mean()

    # Calculate the Long term Exponential Moving Average
    long_ema = data.Close.ewm(span=long_window, adjust=False).mean()

    # Compute MACD (short EMA - long EMA)
    data["MACD"] = short_ema - long_ema

    # Compute Signal Line (EMA of MACD)
    data["Signal_Line"] = data.MACD.ewm(span=signal_window, adjust=False).mean()

    # Normalize the 'MACD' column using z-score normalization
    data["MACD"] = (data["MACD"] - data["MACD"].mean()) / data["MACD"].std()

    # Normalize the 'Signal_Line' column using z-score normalization
    data["Signal_Line"] = (data["Signal_Line"] - data["Signal_Line"].mean()) / data[
        "Signal_Line"
    ].std()

    # Convert normalized data to percentiles (CDF) and rescale to -100% to +100%
    # Rescaling allows comparing the relative position of the current value within the distribution
    data["MACD"] = norm.cdf(data["MACD"]) * 200 - 100
    data["Signal_Line"] = norm.cdf(data["Signal_Line"]) * 200 - 100

    return data


def find_crossovers(
    df: pd.DataFrame, bullish_threshold: float, bearish_threshold: float
) -> pd.DataFrame:
    """
    Identifies the bullish and bearish crossover points between MACD and Signal Line.

    This function checks where the MACD line crosses the Signal Line from below (bullish crossover)
    or from above (bearish crossover). It then marks these crossovers with a 1 for bullish or -1
    for bearish within a new column in the DataFrame called 'Crossover'.

    Parameters:
        df (pd.DataFrame): The dataframe containing the columns 'MACD' and 'Signal_Line'.
        bullish_threshold (float): The threshold above which a crossover is considered bullish.
        bearish_threshold (float): The threshold below which a crossover is considered bearish.

    Returns:
        pd.DataFrame: The input DataFrame with an additional 'Crossover' column indicating
                      the bullish (+1) and bearish (-1) crossovers.
    """

    # Initialize 'Crossover' column to zero, indicating no crossover by default
    df["Crossover"] = 0

    # Find bullish crossovers - when the MACD crosses the Signal Line from below
    # and the Signal Line is below the bullish threshold.
    crossover_indices = df.index[
        (df["MACD"] > df["Signal_Line"])
        & (df["MACD"].shift() < df["Signal_Line"].shift())
        & (df["Signal_Line"] < bullish_threshold)
    ]
    # Mark the bullish crossovers with 1 in the 'Crossover' column
    df.loc[crossover_indices, "Crossover"] = 1

    # Find bearish crossovers - when the MACD crosses the Signal Line from above
    # and the Signal Line is above the bearish threshold.
    crossover_indices = df.index[
        (df["MACD"] < df["Signal_Line"])
        & (df["MACD"].shift() > df["Signal_Line"].shift())
        & (df["Signal_Line"] > bearish_threshold)
    ]
    # Mark the bearish crossovers with -1 in the 'Crossover' column
    df.loc[crossover_indices, "Crossover"] = -1

    return df


def get_fundamentals(ticker: str):
    """
    Fetches the income statement, balance sheet, and cash flow statement for a given stock ticker.

    This function retrieves fundamental financial information about a stock using the yfinance library,
    which fetches this data from Yahoo Finance.

    Parameters:
        ticker (str): The stock symbol to query.

    Returns:
        tuple of pandas.DataFrame: A 3-tuple where the first element is an income statement DataFrame,
                                   the second is a balance sheet DataFrame, and the third
                                   is a cash flow statement DataFrame.
    """
    # Create a Ticker object which allows access to Yahoo finance's vast data source
    stock = yf.Ticker(ticker)

    # Fetching and returning annual income statement, balance sheet, and cashflow data
    return stock.income_stmt, stock.balance_sheet, stock.cashflow


def create_fig(data: pd.DataFrame, ticker: str) -> go.Figure:
    """
    Creates a Plotly graph object (figure) that includes a candlestick plot of the stock prices,
    moving averages and a MACD (Moving Average Convergence Divergence) chart for the given data.

    Parameters:
        data (pandas.DataFrame): The input data containing the stock price information.
                                 It must include 'Close', 'Open', 'High', 'Low' columns and
                                 'MACD', 'Signal_Line', 'Crossover' values calculated externally.
        ticker (str): The stock symbol used in subplot titles to indicate the stock being analyzed.

    Returns:
        plotly.graph_objs._figure.Figure: A figure object which includes the visualization of
                                          the stock prices with moving averages and a MACD chart.
    """

    # Calculate moving averages
    data["MA12"] = data["Close"].rolling(window=12).mean()
    data["MA26"] = data["Close"].rolling(window=26).mean()
    data["MA50"] = data["Close"].rolling(window=50).mean()
    data["MA200"] = data["Close"].rolling(window=200).mean()

    # Initialize figure with subplots
    fig = make_subplots(
        rows=2,
        cols=1,
        shared_xaxes=True,
        vertical_spacing=0.02,
        subplot_titles=(f"{ticker} Candlestick", "MACD"),
        row_width=[0.2, 0.7],
    )

    # Add Candlestick trace
    fig.add_trace(
        go.Candlestick(
            x=data.index,
            open=data["Open"],
            high=data["High"],
            low=data["Low"],
            close=data["Close"],
            name="Candlestick",
        ),
        row=1,
        col=1,
    )

    # Add Moving Average traces
    for ma, color in zip(
        ["MA12", "MA26", "MA50", "MA200"], ["magenta", "cyan", "yellow", "black"]
    ):
        fig.add_trace(
            go.Scatter(
                x=data.index,
                y=data[ma],
                line=dict(color=color, width=1.5),
                name=f"{ma} days MA",
            ),
            row=1,
            col=1,
        )

    # Add MACD and Signal Line traces
    fig.add_trace(
        go.Scatter(
            x=data.index, y=data["MACD"], line=dict(color="blue", width=2), name="MACD"
        ),
        row=2,
        col=1,
    )
    fig.add_trace(
        go.Scatter(
            x=data.index,
            y=data["Signal_Line"],
            line=dict(color="orange", width=2),
            name="Signal Line",
        ),
        row=2,
        col=1,
    )

    # Add markers for Bullish and Bearish crossovers on MACD chart
    fig.add_trace(
        go.Scatter(
            mode="markers",
            x=data[data["Crossover"] == 1].index,
            y=data[data["Crossover"] == 1]["MACD"],
            marker_symbol="triangle-up",
            marker_color="green",
            marker_size=20,
            name="Bullish Crossover (MACD) ✅",
        ),
        row=2,
        col=1,
    )
    fig.add_trace(
        go.Scatter(
            mode="markers",
            x=data[data["Crossover"] == -1].index,
            y=data[data["Crossover"] == -1]["MACD"],
            marker_symbol="triangle-down",
            marker_color="red",
            marker_size=20,
            name="Bearish Crossover (MACD) 🈲",
        ),
        row=2,
        col=1,
    )

    # Add markers for Bullish and Bearish crossovers on the Candlestick chart
    fig.add_trace(
        go.Scatter(
            mode="markers",
            x=data[data["Crossover"] == 1].index,
            y=data[data["Crossover"] == 1]["Close"],
            marker_symbol="triangle-up",
            marker_color="green",
            marker_size=25,
            name="Bullish Crossover (Close) ✅",
        ),
        row=1,
        col=1,
    )
    fig.add_trace(
        go.Scatter(
            mode="markers",
            x=data[data["Crossover"] == -1].index,
            y=data[data["Crossover"] == -1]["Close"],
            marker_symbol="triangle-down",
            marker_color="red",
            marker_size=25,
            name="Bearish Crossover (Close) 🈲",
        ),
        row=1,
        col=1,
    )

    # Update layout configurations
    fig.update_layout(
        xaxis_rangeslider_visible=False,
        height=800,  # Define the height of the figure
    )

    return fig


def generate_simulated_data(data: pd.DataFrame, num_days: int) -> pd.DataFrame:
    """
    Generates simulated future data for a given DataFrame based on the statistical characteristics 
    (mean and standard deviation) of the input data.

    The simulation assumes normally distributed returns and extrapolates future values by computing 
    the cumulative product of random returns.

    Parameters:
        data (pandas.DataFrame): The historical data on which the simulation will be based. The index must be date-based.
        num_days (int): The number of days into the future for which data should be simulated.

    Returns:
        pandas.DataFrame: A DataFrame containing the original historical data appended with the simulated future data.
    """

    # Compute mean and standard deviation for each column
    means = data.mean()
    stds = data.std()

    # Generate random returns from normal distribution
    random_returns = pd.DataFrame()
    for col in data.columns:
        random_returns[col] = np.random.normal(loc=means[col], scale=stds[col], size=num_days)

    # Add 1 to the returns
    random_returns += 1

    # Compute cumulative product to get factors
    factors = random_returns.cumprod()

    # Generate future dates
    last_date = data.index[-1]
    future_dates = pd.date_range(start=last_date + pd.DateOffset(days=1), periods=num_days)

    # Append future factors to original data
    future_data = pd.DataFrame(index=future_dates, columns=data.columns, data=factors.values)

    # Concatenate original data and future data
    simulated_data = pd.concat([data, future_data])

    return simulated_data