File size: 5,061 Bytes
88e180d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import streamlit as st
import pandas as pd
from textblob import TextBlob
import joblib
import matplotlib.pyplot as plt
import datetime
# Load the data
def load_data():
stock_data = pd.read_csv('data/stock_yfinance_data.csv')
tweets_data = pd.read_csv('data/stock_tweets.csv')
# Convert the Date columns to datetime
stock_data['Date'] = pd.to_datetime(stock_data['Date'])
tweets_data['Date'] = pd.to_datetime(tweets_data['Date'])
# Perform sentiment analysis on tweets
def get_sentiment(tweet):
analysis = TextBlob(tweet)
return analysis.sentiment.polarity
tweets_data['Sentiment'] = tweets_data['Tweet'].apply(get_sentiment)
# Aggregate sentiment by date and stock
daily_sentiment = tweets_data.groupby(['Date', 'Stock Name']).mean(numeric_only=True).reset_index()
# Convert the Date column in daily_sentiment to datetime64[ns]
daily_sentiment['Date'] = pd.to_datetime(daily_sentiment['Date'])
# Merge stock data with sentiment data
merged_data = pd.merge(stock_data, daily_sentiment, how='left', left_on=['Date', 'Stock Name'], right_on=['Date', 'Stock Name'])
# Fill missing sentiment values with 0 (neutral sentiment)
merged_data['Sentiment'].fillna(0, inplace=True)
# Sort the data by date
merged_data.sort_values(by='Date', inplace=True)
# Create lagged features
merged_data['Prev_Close'] = merged_data.groupby('Stock Name')['Close'].shift(1)
merged_data['Prev_Sentiment'] = merged_data.groupby('Stock Name')['Sentiment'].shift(1)
# Create moving averages
merged_data['MA7'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=7).mean())
merged_data['MA14'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=14).mean())
# Create daily price changes
merged_data['Daily_Change'] = merged_data['Close'] - merged_data['Prev_Close']
# Create volatility
merged_data['Volatility'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=7).std())
# Drop rows with missing values
return merged_data
data = load_data()
stock_names = data['Stock Name'].unique()
# Load the best model
model_filename = 'model/best_model.pkl'
model = joblib.load(model_filename)
st.title("Stock Price Prediction Using Sentiment Analysis")
# User input for stock data
st.header("Input Stock Data")
selected_stock = st.selectbox("Select Stock Name", stock_names)
days_to_predict = st.number_input("Number of Days to Predict",
min_value=1, max_value=30, value=10)
# Get the latest data for the selected stock
latest_data = data[data['Stock Name'] == selected_stock].iloc[-1]
prev_close = latest_data['Close']
prev_sentiment = latest_data['Sentiment']
ma7 = latest_data['MA7']
ma14 = latest_data['MA14']
daily_change = latest_data['Daily_Change']
volatility = latest_data['Volatility']
# Display the latest stock data in a table
latest_data_df = pd.DataFrame({
'Metric': ['Previous Close Price', 'Previous Sentiment', '7-day Moving Average', '14-day Moving Average', 'Daily Change', 'Volatility'],
'Value': [prev_close, prev_sentiment, ma7, ma14, daily_change, volatility]
st.write("Latest Stock Data:")
st.write("Use the inputs above to predict the next days close prices of the stock.")
if st.button("Predict"):
predictions = []
latest_date =
for i in range(days_to_predict):
X_future = pd.DataFrame({
'Prev_Close': [prev_close],
'Prev_Sentiment': [prev_sentiment],
'MA7': [ma7],
'MA14': [ma14],
'Daily_Change': [daily_change],
'Volatility': [volatility]
next_day_prediction = model.predict(X_future)[0]
# Update features for next prediction
prev_close = next_day_prediction
ma7 = (ma7 * 6 + next_day_prediction) / 7 # Simplified rolling calculation
ma14 = (ma14 * 13 + next_day_prediction) / 14 # Simplified rolling calculation
daily_change = next_day_prediction - prev_close
# st.write(f"Predicted next {days_to_predict} days close prices: {predictions}")
# Prepare prediction data for display
# Prepare prediction data for display
prediction_dates = pd.date_range(start=latest_date + pd.Timedelta(days=1), periods=days_to_predict)
prediction_df = pd.DataFrame({
'Date': prediction_dates,
'Predicted Close Price': predictions
st.subheader("Predicted Prices")
# Plotting the results
st.subheader("Prediction Chart")
plt.figure(figsize=(10, 6))
plt.plot(prediction_df['Date'], prediction_df['Predicted Close Price'], marker='o', linestyle='--', label="Predicted Close Price")
plt.ylabel("Close Price")
plt.title(f"{selected_stock} Predicted Close Prices")