|
""" |
|
Demo is based on https://scikit-learn.org/stable/auto_examples/applications/plot_stock_market.html |
|
""" |
|
|
|
import sys |
|
import numpy as np |
|
import pandas as pd |
|
|
|
symbol_dict = { |
|
"TOT": "Total", |
|
"XOM": "Exxon", |
|
"CVX": "Chevron", |
|
"COP": "ConocoPhillips", |
|
"VLO": "Valero Energy", |
|
"MSFT": "Microsoft", |
|
"IBM": "IBM", |
|
"TWX": "Time Warner", |
|
"CMCSA": "Comcast", |
|
"CVC": "Cablevision", |
|
"YHOO": "Yahoo", |
|
"DELL": "Dell", |
|
"HPQ": "HP", |
|
"AMZN": "Amazon", |
|
"TM": "Toyota", |
|
"CAJ": "Canon", |
|
"SNE": "Sony", |
|
"F": "Ford", |
|
"HMC": "Honda", |
|
"NAV": "Navistar", |
|
"NOC": "Northrop Grumman", |
|
"BA": "Boeing", |
|
"KO": "Coca Cola", |
|
"MMM": "3M", |
|
"MCD": "McDonald's", |
|
"PEP": "Pepsi", |
|
"K": "Kellogg", |
|
"UN": "Unilever", |
|
"MAR": "Marriott", |
|
"PG": "Procter Gamble", |
|
"CL": "Colgate-Palmolive", |
|
"GE": "General Electrics", |
|
"WFC": "Wells Fargo", |
|
"JPM": "JPMorgan Chase", |
|
"AIG": "AIG", |
|
"AXP": "American express", |
|
"BAC": "Bank of America", |
|
"GS": "Goldman Sachs", |
|
"AAPL": "Apple", |
|
"SAP": "SAP", |
|
"CSCO": "Cisco", |
|
"TXN": "Texas Instruments", |
|
"XRX": "Xerox", |
|
"WMT": "Wal-Mart", |
|
"HD": "Home Depot", |
|
"GSK": "GlaxoSmithKline", |
|
"PFE": "Pfizer", |
|
"SNY": "Sanofi-Aventis", |
|
"NVS": "Novartis", |
|
"KMB": "Kimberly-Clark", |
|
"R": "Ryder", |
|
"GD": "General Dynamics", |
|
"RTN": "Raytheon", |
|
"CVS": "CVS", |
|
"CAT": "Caterpillar", |
|
"DD": "DuPont de Nemours", |
|
} |
|
|
|
|
|
symbols, names = np.array(sorted(symbol_dict.items())).T |
|
|
|
quotes = [] |
|
|
|
for symbol in symbols: |
|
print("Fetching quote history for %r" % symbol, file=sys.stderr) |
|
url = ( |
|
"https://raw.githubusercontent.com/scikit-learn/examples-data/" |
|
"master/financial-data/{}.csv" |
|
) |
|
quotes.append(pd.read_csv(url.format(symbol))) |
|
|
|
close_prices = np.vstack([q["close"] for q in quotes]) |
|
open_prices = np.vstack([q["open"] for q in quotes]) |
|
|
|
|
|
variation = close_prices - open_prices |
|
|
|
|
|
from sklearn import covariance |
|
|
|
alphas = np.logspace(-1.5, 1, num=10) |
|
edge_model = covariance.GraphicalLassoCV(alphas=alphas) |
|
|
|
|
|
|
|
|
|
X = variation.copy().T |
|
X /= X.std(axis=0) |
|
edge_model.fit(X) |
|
|
|
|
|
|
|
from sklearn import cluster |
|
|
|
_, labels = cluster.affinity_propagation(edge_model.covariance_, random_state=0) |
|
n_labels = labels.max() |
|
|
|
|
|
|
|
|
|
|
|
from sklearn import manifold |
|
|
|
node_position_model = manifold.LocallyLinearEmbedding( |
|
n_components=2, eigen_solver="dense", n_neighbors=6 |
|
) |
|
|
|
embedding = node_position_model.fit_transform(X.T).T |
|
|
|
import matplotlib.pyplot as plt |
|
from matplotlib.collections import LineCollection |
|
|
|
def visualize_stocks(): |
|
fig = plt.figure(1, facecolor="w", figsize=(10, 8)) |
|
plt.clf() |
|
ax = plt.axes([0.0, 0.0, 1.0, 1.0]) |
|
plt.axis("off") |
|
|
|
|
|
partial_correlations = edge_model.precision_.copy() |
|
d = 1 / np.sqrt(np.diag(partial_correlations)) |
|
partial_correlations *= d |
|
partial_correlations *= d[:, np.newaxis] |
|
non_zero = np.abs(np.triu(partial_correlations, k=1)) > 0.02 |
|
|
|
|
|
plt.scatter( |
|
embedding[0], embedding[1], s=100 * d**2, c=labels, cmap=plt.cm.nipy_spectral |
|
) |
|
|
|
|
|
start_idx, end_idx = np.where(non_zero) |
|
|
|
|
|
segments = [ |
|
[embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx) |
|
] |
|
values = np.abs(partial_correlations[non_zero]) |
|
lc = LineCollection( |
|
segments, zorder=0, cmap=plt.cm.hot_r, norm=plt.Normalize(0, 0.7 * values.max()) |
|
) |
|
lc.set_array(values) |
|
lc.set_linewidths(15 * values) |
|
ax.add_collection(lc) |
|
|
|
|
|
|
|
for index, (name, label, (x, y)) in enumerate(zip(names, labels, embedding.T)): |
|
|
|
dx = x - embedding[0] |
|
dx[index] = 1 |
|
dy = y - embedding[1] |
|
dy[index] = 1 |
|
this_dx = dx[np.argmin(np.abs(dy))] |
|
this_dy = dy[np.argmin(np.abs(dx))] |
|
if this_dx > 0: |
|
horizontalalignment = "left" |
|
x = x + 0.002 |
|
else: |
|
horizontalalignment = "right" |
|
x = x - 0.002 |
|
if this_dy > 0: |
|
verticalalignment = "bottom" |
|
y = y + 0.002 |
|
else: |
|
verticalalignment = "top" |
|
y = y - 0.002 |
|
plt.text( |
|
x, |
|
y, |
|
name, |
|
size=10, |
|
horizontalalignment=horizontalalignment, |
|
verticalalignment=verticalalignment, |
|
bbox=dict( |
|
facecolor="w", |
|
edgecolor=plt.cm.nipy_spectral(label / float(n_labels)), |
|
alpha=0.6, |
|
), |
|
) |
|
|
|
plt.xlim( |
|
embedding[0].min() - 0.15 * embedding[0].ptp(), |
|
embedding[0].max() + 0.10 * embedding[0].ptp(), |
|
) |
|
plt.ylim( |
|
embedding[1].min() - 0.03 * embedding[1].ptp(), |
|
embedding[1].max() + 0.03 * embedding[1].ptp(), |
|
) |
|
|
|
return fig |
|
|
|
import gradio as gr |
|
|
|
title = " π Visualizing the stock market structure π" |
|
|
|
with gr.Blocks(title=title) as demo: |
|
gr.Markdown(f"# {title}") |
|
gr.Markdown(" Data is of 56 stocks between the period of 2003 - 2008 <br>") |
|
gr.Markdown(" Stocks the move in together with each other are grouped together in a cluster <br>") |
|
|
|
gr.Markdown(" **[Demo is based on sklearn docs](https://scikit-learn.org/stable/auto_examples/applications/plot_stock_market.html)**") |
|
|
|
for i in range(n_labels + 1): |
|
gr.Markdown( f"Cluster {i + 1}: {', '.join(names[labels == i])}") |
|
|
|
btn = gr.Button(value="Visualize") |
|
btn.click(visualize_stocks, outputs= gr.Plot(label='Visualizing stock into clusters') ) |
|
gr.Markdown( f"## In progress") |
|
demo.launch() |