eagle0504 commited on
Commit
382f27a
·
verified ·
1 Parent(s): ff4e8fe

Create utils/helper.py

Browse files
Files changed (1) hide show
  1. utils/helper.py +404 -0
utils/helper.py ADDED
@@ -0,0 +1,404 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import plotly.graph_objects as go
3
+ import streamlit as st
4
+ import yfinance as yf
5
+ from plotly.subplots import make_subplots
6
+ from scipy.stats import norm
7
+ import numpy as np
8
+
9
+
10
+ def calculate_macd(
11
+ data: pd.DataFrame,
12
+ short_window: int = 12,
13
+ long_window: int = 26,
14
+ signal_window: int = 9,
15
+ ) -> pd.DataFrame:
16
+ """
17
+ Calculate the Moving Average Convergence Divergence (MACD) and Signal line indicators.
18
+
19
+ Parameters:
20
+ data (pd.DataFrame): The dataframe containing stock price information.
21
+ short_window (int): The number of periods for the shorter exponential moving average (EMA).
22
+ Default is 12.
23
+ long_window (int): The number of periods for the longer EMA. Default is 26.
24
+ signal_window (int): The number of periods for the signal line EMA. Default is 9.
25
+
26
+ Returns:
27
+ pd.DataFrame: The input Dataframe with additional columns 'MACD' and 'Signal_Line'
28
+ which contains the computed MACD values and signal line values respectively.
29
+
30
+ Note: The function assumes that the input DataFrame contains a 'Close' column from which it computes the EMAs.
31
+ """
32
+ # Calculate the Short term Exponential Moving Average
33
+ short_ema = data.Close.ewm(span=short_window, adjust=False).mean()
34
+
35
+ # Calculate the Long term Exponential Moving Average
36
+ long_ema = data.Close.ewm(span=long_window, adjust=False).mean()
37
+
38
+ # Compute MACD (short EMA - long EMA)
39
+ data["MACD"] = short_ema - long_ema
40
+
41
+ # Compute Signal Line (EMA of MACD)
42
+ data["Signal_Line"] = data.MACD.ewm(span=signal_window, adjust=False).mean()
43
+
44
+ return data
45
+
46
+
47
+ def calculate_normalized_macd(
48
+ data: pd.DataFrame,
49
+ short_window: int = 12,
50
+ long_window: int = 26,
51
+ signal_window: int = 9,
52
+ ) -> pd.DataFrame:
53
+ """
54
+ Calculate the normalized Moving Average Convergence Divergence (MACD) and Signal line.
55
+
56
+ The MACD is a trend-following momentum indicator that shows the relationship between
57
+ two moving averages of a security's price. The MACD is calculated by subtracting the
58
+ long-term exponential moving average (EMA) from the short-term EMA. A nine-day EMA of
59
+ the MACD called the "Signal Line," is then plotted on top of the MACD, functioning as
60
+ a trigger for buy and sell signals.
61
+
62
+ This function adds a normalization step to the typical MACD calculation by standardizing
63
+ the values using z-scores.
64
+
65
+ Parameters:
66
+ data (pd.DataFrame): The dataframe containing stock price information with a 'Close' column.
67
+ short_window (int): The number of periods for the shorter EMA. Default is 12.
68
+ long_window (int): The number of periods for the longer EMA. Default is 26.
69
+ signal_window (int): The number of periods for the signal line EMA. Default is 9.
70
+
71
+ Returns:
72
+ pd.DataFrame: The input Dataframe is returned with additional columns 'MACD' and 'Signal_Line',
73
+ which contains the computed normalized MACD and signal line values respectively.
74
+ """
75
+ # Calculate the Short term Exponential Moving Average
76
+ short_ema = data.Close.ewm(span=short_window, adjust=False).mean()
77
+
78
+ # Calculate the Long term Exponential Moving Average
79
+ long_ema = data.Close.ewm(span=long_window, adjust=False).mean()
80
+
81
+ # Compute MACD (short EMA - long EMA)
82
+ data["MACD"] = short_ema - long_ema
83
+
84
+ # Compute Signal Line (EMA of MACD)
85
+ data["Signal_Line"] = data.MACD.ewm(span=signal_window, adjust=False).mean()
86
+
87
+ # Normalize the 'MACD' column using z-score normalization
88
+ data["MACD"] = (data["MACD"] - data["MACD"].mean()) / data["MACD"].std()
89
+
90
+ # Normalize the 'Signal_Line' column using z-score normalization
91
+ data["Signal_Line"] = (data["Signal_Line"] - data["Signal_Line"].mean()) / data[
92
+ "Signal_Line"
93
+ ].std()
94
+
95
+ return data
96
+
97
+
98
+ def calculate_percentile_macd(
99
+ data: pd.DataFrame,
100
+ short_window: int = 12,
101
+ long_window: int = 26,
102
+ signal_window: int = 9,
103
+ ) -> pd.DataFrame:
104
+ """
105
+ Calculate the percentile-based Moving Average Convergence Divergence (MACD) and Signal line.
106
+
107
+ This function computes the MACD by subtracting the long-term exponential moving average (EMA)
108
+ from the short-term EMA. It then calculates the Signal Line, which is a smoothing of the MACD
109
+ values. After normalization using z-scores, the normalized MACD and Signal Line values are converted
110
+ to percentiles, which are then rescaled to range from -100% to +100%.
111
+
112
+ Parameters:
113
+ data (pd.DataFrame): The dataframe containing stock price information with a 'Close' column.
114
+ short_window (int): The number of periods for the shorter EMA. Default is 12.
115
+ long_window (int): The number of periods for the longer EMA. Default is 26.
116
+ signal_window (int): The number of periods for the signal line EMA. Default is 9.
117
+
118
+ Returns:
119
+ pd.DataFrame: The input Dataframe with additional columns 'MACD' and 'Signal_Line', representing
120
+ the rescaled percentile values of the corresponding MACD and signal line calculations.
121
+ """
122
+ # Calculate the Short term Exponential Moving Average
123
+ short_ema = data.Close.ewm(span=short_window, adjust=False).mean()
124
+
125
+ # Calculate the Long term Exponential Moving Average
126
+ long_ema = data.Close.ewm(span=long_window, adjust=False).mean()
127
+
128
+ # Compute MACD (short EMA - long EMA)
129
+ data["MACD"] = short_ema - long_ema
130
+
131
+ # Compute Signal Line (EMA of MACD)
132
+ data["Signal_Line"] = data.MACD.ewm(span=signal_window, adjust=False).mean()
133
+
134
+ # Normalize the 'MACD' column using z-score normalization
135
+ data["MACD"] = (data["MACD"] - data["MACD"].mean()) / data["MACD"].std()
136
+
137
+ # Normalize the 'Signal_Line' column using z-score normalization
138
+ data["Signal_Line"] = (data["Signal_Line"] - data["Signal_Line"].mean()) / data[
139
+ "Signal_Line"
140
+ ].std()
141
+
142
+ # Convert normalized data to percentiles (CDF) and rescale to -100% to +100%
143
+ # Rescaling allows comparing the relative position of the current value within the distribution
144
+ data["MACD"] = norm.cdf(data["MACD"]) * 200 - 100
145
+ data["Signal_Line"] = norm.cdf(data["Signal_Line"]) * 200 - 100
146
+
147
+ return data
148
+
149
+
150
+ def find_crossovers(
151
+ df: pd.DataFrame, bullish_threshold: float, bearish_threshold: float
152
+ ) -> pd.DataFrame:
153
+ """
154
+ Identifies the bullish and bearish crossover points between MACD and Signal Line.
155
+
156
+ This function checks where the MACD line crosses the Signal Line from below (bullish crossover)
157
+ or from above (bearish crossover). It then marks these crossovers with a 1 for bullish or -1
158
+ for bearish within a new column in the DataFrame called 'Crossover'.
159
+
160
+ Parameters:
161
+ df (pd.DataFrame): The dataframe containing the columns 'MACD' and 'Signal_Line'.
162
+ bullish_threshold (float): The threshold above which a crossover is considered bullish.
163
+ bearish_threshold (float): The threshold below which a crossover is considered bearish.
164
+
165
+ Returns:
166
+ pd.DataFrame: The input DataFrame with an additional 'Crossover' column indicating
167
+ the bullish (+1) and bearish (-1) crossovers.
168
+ """
169
+
170
+ # Initialize 'Crossover' column to zero, indicating no crossover by default
171
+ df["Crossover"] = 0
172
+
173
+ # Find bullish crossovers - when the MACD crosses the Signal Line from below
174
+ # and the Signal Line is below the bullish threshold.
175
+ crossover_indices = df.index[
176
+ (df["MACD"] > df["Signal_Line"])
177
+ & (df["MACD"].shift() < df["Signal_Line"].shift())
178
+ & (df["Signal_Line"] < bullish_threshold)
179
+ ]
180
+ # Mark the bullish crossovers with 1 in the 'Crossover' column
181
+ df.loc[crossover_indices, "Crossover"] = 1
182
+
183
+ # Find bearish crossovers - when the MACD crosses the Signal Line from above
184
+ # and the Signal Line is above the bearish threshold.
185
+ crossover_indices = df.index[
186
+ (df["MACD"] < df["Signal_Line"])
187
+ & (df["MACD"].shift() > df["Signal_Line"].shift())
188
+ & (df["Signal_Line"] > bearish_threshold)
189
+ ]
190
+ # Mark the bearish crossovers with -1 in the 'Crossover' column
191
+ df.loc[crossover_indices, "Crossover"] = -1
192
+
193
+ return df
194
+
195
+
196
+ def get_fundamentals(ticker: str):
197
+ """
198
+ Fetches the income statement, balance sheet, and cash flow statement for a given stock ticker.
199
+
200
+ This function retrieves fundamental financial information about a stock using the yfinance library,
201
+ which fetches this data from Yahoo Finance.
202
+
203
+ Parameters:
204
+ ticker (str): The stock symbol to query.
205
+
206
+ Returns:
207
+ tuple of pandas.DataFrame: A 3-tuple where the first element is an income statement DataFrame,
208
+ the second is a balance sheet DataFrame, and the third
209
+ is a cash flow statement DataFrame.
210
+ """
211
+ # Create a Ticker object which allows access to Yahoo finance's vast data source
212
+ stock = yf.Ticker(ticker)
213
+
214
+ # Fetching and returning annual income statement, balance sheet, and cashflow data
215
+ return stock.income_stmt, stock.balance_sheet, stock.cashflow
216
+
217
+
218
+ def create_fig(data: pd.DataFrame, ticker: str) -> go.Figure:
219
+ """
220
+ Creates a Plotly graph object (figure) that includes a candlestick plot of the stock prices,
221
+ moving averages and a MACD (Moving Average Convergence Divergence) chart for the given data.
222
+
223
+ Parameters:
224
+ data (pandas.DataFrame): The input data containing the stock price information.
225
+ It must include 'Close', 'Open', 'High', 'Low' columns and
226
+ 'MACD', 'Signal_Line', 'Crossover' values calculated externally.
227
+ ticker (str): The stock symbol used in subplot titles to indicate the stock being analyzed.
228
+
229
+ Returns:
230
+ plotly.graph_objs._figure.Figure: A figure object which includes the visualization of
231
+ the stock prices with moving averages and a MACD chart.
232
+ """
233
+
234
+ # Calculate moving averages
235
+ data["MA12"] = data["Close"].rolling(window=12).mean()
236
+ data["MA26"] = data["Close"].rolling(window=26).mean()
237
+ data["MA50"] = data["Close"].rolling(window=50).mean()
238
+ data["MA200"] = data["Close"].rolling(window=200).mean()
239
+
240
+ # Initialize figure with subplots
241
+ fig = make_subplots(
242
+ rows=2,
243
+ cols=1,
244
+ shared_xaxes=True,
245
+ vertical_spacing=0.02,
246
+ subplot_titles=(f"{ticker} Candlestick", "MACD"),
247
+ row_width=[0.2, 0.7],
248
+ )
249
+
250
+ # Add Candlestick trace
251
+ fig.add_trace(
252
+ go.Candlestick(
253
+ x=data.index,
254
+ open=data["Open"],
255
+ high=data["High"],
256
+ low=data["Low"],
257
+ close=data["Close"],
258
+ name="Candlestick",
259
+ ),
260
+ row=1,
261
+ col=1,
262
+ )
263
+
264
+ # Add Moving Average traces
265
+ for ma, color in zip(
266
+ ["MA12", "MA26", "MA50", "MA200"], ["magenta", "cyan", "yellow", "black"]
267
+ ):
268
+ fig.add_trace(
269
+ go.Scatter(
270
+ x=data.index,
271
+ y=data[ma],
272
+ line=dict(color=color, width=1.5),
273
+ name=f"{ma} days MA",
274
+ ),
275
+ row=1,
276
+ col=1,
277
+ )
278
+
279
+ # Add MACD and Signal Line traces
280
+ fig.add_trace(
281
+ go.Scatter(
282
+ x=data.index, y=data["MACD"], line=dict(color="blue", width=2), name="MACD"
283
+ ),
284
+ row=2,
285
+ col=1,
286
+ )
287
+ fig.add_trace(
288
+ go.Scatter(
289
+ x=data.index,
290
+ y=data["Signal_Line"],
291
+ line=dict(color="orange", width=2),
292
+ name="Signal Line",
293
+ ),
294
+ row=2,
295
+ col=1,
296
+ )
297
+
298
+ # Add markers for Bullish and Bearish crossovers on MACD chart
299
+ fig.add_trace(
300
+ go.Scatter(
301
+ mode="markers",
302
+ x=data[data["Crossover"] == 1].index,
303
+ y=data[data["Crossover"] == 1]["MACD"],
304
+ marker_symbol="triangle-up",
305
+ marker_color="green",
306
+ marker_size=20,
307
+ name="Bullish Crossover (MACD) ✅",
308
+ ),
309
+ row=2,
310
+ col=1,
311
+ )
312
+ fig.add_trace(
313
+ go.Scatter(
314
+ mode="markers",
315
+ x=data[data["Crossover"] == -1].index,
316
+ y=data[data["Crossover"] == -1]["MACD"],
317
+ marker_symbol="triangle-down",
318
+ marker_color="red",
319
+ marker_size=20,
320
+ name="Bearish Crossover (MACD) 🈲",
321
+ ),
322
+ row=2,
323
+ col=1,
324
+ )
325
+
326
+ # Add markers for Bullish and Bearish crossovers on the Candlestick chart
327
+ fig.add_trace(
328
+ go.Scatter(
329
+ mode="markers",
330
+ x=data[data["Crossover"] == 1].index,
331
+ y=data[data["Crossover"] == 1]["Close"],
332
+ marker_symbol="triangle-up",
333
+ marker_color="green",
334
+ marker_size=25,
335
+ name="Bullish Crossover (Close) ✅",
336
+ ),
337
+ row=1,
338
+ col=1,
339
+ )
340
+ fig.add_trace(
341
+ go.Scatter(
342
+ mode="markers",
343
+ x=data[data["Crossover"] == -1].index,
344
+ y=data[data["Crossover"] == -1]["Close"],
345
+ marker_symbol="triangle-down",
346
+ marker_color="red",
347
+ marker_size=25,
348
+ name="Bearish Crossover (Close) 🈲",
349
+ ),
350
+ row=1,
351
+ col=1,
352
+ )
353
+
354
+ # Update layout configurations
355
+ fig.update_layout(
356
+ xaxis_rangeslider_visible=False,
357
+ height=800, # Define the height of the figure
358
+ )
359
+
360
+ return fig
361
+
362
+
363
+ def generate_simulated_data(data: pd.DataFrame, num_days: int) -> pd.DataFrame:
364
+ """
365
+ Generates simulated future data for a given DataFrame based on the statistical characteristics
366
+ (mean and standard deviation) of the input data.
367
+
368
+ The simulation assumes normally distributed returns and extrapolates future values by computing
369
+ the cumulative product of random returns.
370
+
371
+ Parameters:
372
+ data (pandas.DataFrame): The historical data on which the simulation will be based. The index must be date-based.
373
+ num_days (int): The number of days into the future for which data should be simulated.
374
+
375
+ Returns:
376
+ pandas.DataFrame: A DataFrame containing the original historical data appended with the simulated future data.
377
+ """
378
+
379
+ # Compute mean and standard deviation for each column
380
+ means = data.mean()
381
+ stds = data.std()
382
+
383
+ # Generate random returns from normal distribution
384
+ random_returns = pd.DataFrame()
385
+ for col in data.columns:
386
+ random_returns[col] = np.random.normal(loc=means[col], scale=stds[col], size=num_days)
387
+
388
+ # Add 1 to the returns
389
+ random_returns += 1
390
+
391
+ # Compute cumulative product to get factors
392
+ factors = random_returns.cumprod()
393
+
394
+ # Generate future dates
395
+ last_date = data.index[-1]
396
+ future_dates = pd.date_range(start=last_date + pd.DateOffset(days=1), periods=num_days)
397
+
398
+ # Append future factors to original data
399
+ future_data = pd.DataFrame(index=future_dates, columns=data.columns, data=factors.values)
400
+
401
+ # Concatenate original data and future data
402
+ simulated_data = pd.concat([data, future_data])
403
+
404
+ return simulated_data