Spaces:
Sleeping
Sleeping
Upload 9 files
Browse files- .gitattributes +2 -0
- app.py +298 -63
- equal_weighted_benchmark_df.csv +249 -0
- factor_data.csv +3 -0
- financial_analysis.py +205 -0
- finbert_sentiment.csv +3 -0
- merged_data.csv +0 -0
- prices.csv +0 -0
- requirements.txt +5 -1
- scraping.xlsx +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
factor_data.csv filter=lfs diff=lfs merge=lfs -text
|
37 |
+
finbert_sentiment.csv filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -1,77 +1,255 @@
|
|
1 |
# import streamlit as st
|
2 |
# import pandas as pd
|
3 |
# import numpy as np
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
# import matplotlib.pyplot as plt
|
|
|
|
|
5 |
|
6 |
-
# # Placeholder for loading models
|
7 |
-
# def load_models():
|
8 |
-
# # In a real scenario, you would load your pre-trained models here.
|
9 |
-
# return {"model_placeholder": "Loaded Model"}
|
10 |
-
|
11 |
-
# # Placeholder function to classify news as ESG-related
|
12 |
-
# def classify_esg(text, models, api_key):
|
13 |
-
# # Simulate ESG classification logic
|
14 |
-
# # This is where you would use your model to classify the text.
|
15 |
-
# return np.random.choice(["Yes", "No"])
|
16 |
-
|
17 |
-
# # Placeholder function to determine sentiment
|
18 |
-
# def determine_sentiment(text, models, api_key):
|
19 |
-
# # Simulate sentiment analysis logic
|
20 |
-
# # This is where you would use your model to determine the sentiment.
|
21 |
-
# return np.random.choice(["Positive", "Neutral", "Negative"])
|
22 |
-
|
23 |
-
# # Placeholder function to run Alphalens analysis
|
24 |
-
# def run_alphalens_analysis(data, models, api_key):
|
25 |
-
# # Simulate some metrics
|
26 |
-
# metrics = {"alpha": np.random.rand(), "beta": np.random.rand()}
|
27 |
-
|
28 |
-
# # Generate a simple plot
|
29 |
-
# fig, ax = plt.subplots()
|
30 |
-
# ax.plot([1, 2, 3], [1, 2, 3], 'r') # Example plot
|
31 |
-
# ax.set_title('Example Plot')
|
32 |
-
|
33 |
-
# return metrics, [fig]
|
34 |
|
35 |
-
#
|
36 |
-
#
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
-
#
|
|
|
39 |
|
40 |
-
#
|
|
|
|
|
|
|
|
|
41 |
|
42 |
-
#
|
43 |
-
# if uploaded_file is not None:
|
44 |
-
# data = pd.read_csv(uploaded_file)
|
45 |
-
# st.write("Uploaded News Data:")
|
46 |
-
# st.dataframe(data)
|
47 |
|
48 |
-
#
|
49 |
-
# data['ESG'] = data['news'].apply(lambda x: classify_esg(x, models, api_key))
|
50 |
-
# st.write("News with ESG Classification:")
|
51 |
-
# st.dataframe(data)
|
52 |
|
53 |
-
#
|
54 |
-
#
|
55 |
-
#
|
56 |
-
# st.dataframe(data)
|
57 |
|
58 |
-
#
|
59 |
-
#
|
60 |
-
|
61 |
-
#
|
62 |
-
|
63 |
-
#
|
64 |
-
#
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
import streamlit as st
|
69 |
import pandas as pd
|
70 |
import numpy as np
|
71 |
import os
|
|
|
72 |
import openai
|
|
|
73 |
import json
|
74 |
from getpass import getpass
|
|
|
75 |
from tqdm import tqdm
|
76 |
import matplotlib.pyplot as plt
|
77 |
|
@@ -146,7 +324,60 @@ def update_dataset_with_gpt_sentiment(df, model, column_name='GPT_based_sentimen
|
|
146 |
|
147 |
return df
|
148 |
|
149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
st.set_page_config(page_title="NLP ESG Project", page_icon="📈")
|
151 |
|
152 |
# Custom styles
|
@@ -180,6 +411,7 @@ def app_layout():
|
|
180 |
|
181 |
# API Key input
|
182 |
openai_api_key = st.sidebar.text_input("Enter your OpenAI API key", type="password")
|
|
|
183 |
os.environ["OPENAI_API_KEY"] = openai_api_key
|
184 |
openai.api_key = openai_api_key
|
185 |
|
@@ -199,16 +431,22 @@ def app_layout():
|
|
199 |
st.sidebar.text(f"Current Strategy: {'Conservative' if investment_strategy <= 0.5 else 'Aggressive'}")
|
200 |
|
201 |
# Main container
|
202 |
-
if uploaded_file
|
203 |
# Displaying the file
|
204 |
data = pd.read_csv(uploaded_file)
|
|
|
205 |
st.write("### Uploaded News Data:")
|
206 |
st.dataframe(data, use_container_width=True)
|
207 |
|
208 |
if st.button("🔍 Classify ESG"):
|
209 |
st.write("Classifying ESG-related news...")
|
210 |
-
|
211 |
-
|
|
|
|
|
|
|
|
|
|
|
212 |
|
213 |
if st.button("😊 Determine Sentiment"):
|
214 |
st.write("Determining sentiment using GPT...")
|
@@ -216,9 +454,9 @@ def app_layout():
|
|
216 |
try:
|
217 |
with st.spinner("Analyzing sentiment..."):
|
218 |
# Assume you have your API key set and a function defined to handle sentiment analysis
|
219 |
-
updated_data = update_dataset_with_gpt_sentiment(
|
220 |
st.write("News with GPT-based Sentiment Analysis:")
|
221 |
-
st.dataframe(updated_data, use_container_width=True)
|
222 |
except Exception as e:
|
223 |
st.error(f"An error occurred: {e}")
|
224 |
|
@@ -229,8 +467,5 @@ def app_layout():
|
|
229 |
with st.expander("Advanced Settings"):
|
230 |
st.write("Any advanced settings and configurations will go here.")
|
231 |
|
232 |
-
def main():
|
233 |
-
app_layout()
|
234 |
-
|
235 |
if __name__ == "__main__":
|
236 |
-
main()
|
|
|
1 |
# import streamlit as st
|
2 |
# import pandas as pd
|
3 |
# import numpy as np
|
4 |
+
# import os
|
5 |
+
# import ast
|
6 |
+
# import openai
|
7 |
+
# from openai import OpenAI
|
8 |
+
# import json
|
9 |
+
# from getpass import getpass
|
10 |
+
# from scipy.spatial.distance import cosine
|
11 |
+
# from tqdm import tqdm
|
12 |
# import matplotlib.pyplot as plt
|
13 |
+
# import financial_analysis as fa
|
14 |
+
# from financial_analysis import alphalens_analysis, alphalens_analysis_by_sector, calculate_information_ratio, process_sentiment_data
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
+
# def get_sentiment_gpt(company, SASB, news, max_retries=5, model = 'gpt-4-turbo-2024-04-09'):
|
18 |
+
# system_prompt = """
|
19 |
+
# As a specialist in ESG analytics,
|
20 |
+
# You possess a deep understanding of evaluating environmental, social, and governance factors in the context of corporate news.
|
21 |
+
# Your expertise lies in discerning the underlying sentiment of news segments that pertain to a company's ESG practices,
|
22 |
+
# determining whether the coverage reflects a positive, negative, or neutral stance.
|
23 |
+
# """
|
24 |
|
25 |
+
# allowed_sentiments = ['Negative', 'Positive', 'Neutral']
|
26 |
+
# attempt = 0
|
27 |
|
28 |
+
# while attempt < max_retries:
|
29 |
+
# main_prompt = f"""
|
30 |
+
# Classify the sentiment (Only options: Positive, Negative, Neutral) of the following news: {news} |
|
31 |
+
# The sentiment classification should be about the sections of the news talking about the company {company}. |
|
32 |
+
# The ESG part of the news should be around topics within the following SASB topics {SASB}
|
33 |
|
34 |
+
# The output should be a structured JSON object with the key: "sentiment".
|
|
|
|
|
|
|
|
|
35 |
|
36 |
+
# Here is the format I expect for the JSON object:
|
|
|
|
|
|
|
37 |
|
38 |
+
# {{
|
39 |
+
# "sentiment": "Enter 'Positive', 'Neutral', or 'Negative'",
|
40 |
+
# }}
|
|
|
41 |
|
42 |
+
# Do not return any additional text or information outside of this JSON structure.
|
43 |
+
# """
|
44 |
+
|
45 |
+
# messages = [
|
46 |
+
# {"role": "system", "content": system_prompt},
|
47 |
+
# {"role": "user", "content": main_prompt}
|
48 |
+
# ]
|
49 |
+
|
50 |
+
# response = openai.chat.completions.create(
|
51 |
+
# model=model,
|
52 |
+
# messages=messages,
|
53 |
+
# response_format={"type": "json_object"} # Enable JSON mode
|
54 |
+
# )
|
55 |
+
|
56 |
+
# response_json = json.loads(response.choices[0].message.content)
|
57 |
+
# json_sentiment = response_json.get('sentiment')
|
58 |
+
|
59 |
+
# if json_sentiment in allowed_sentiments:
|
60 |
+
# return json_sentiment
|
61 |
|
62 |
+
# attempt += 1
|
63 |
+
|
64 |
+
# # After max retries, if no valid sentiment is found, handle as needed (e.g., return a default sentiment)
|
65 |
+
# print("Failed to obtain a valid sentiment after maximum retries. Defaulting to 'Neutral'.")
|
66 |
+
# return 'Neutral' # Default return value if no valid sentiment is obtained
|
67 |
+
|
68 |
+
|
69 |
+
# def update_dataset_with_gpt_sentiment(df, model, column_name='GPT_based_sentiment'):
|
70 |
+
# # Initialize the new column to store GPT-based sentiment
|
71 |
+
# df['GPT_based_sentiment'] = None
|
72 |
+
|
73 |
+
# # Use tqdm to show a progress bar for the operation
|
74 |
+
# for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing rows"):
|
75 |
+
# # Extract necessary information for each row
|
76 |
+
# company = row['Company'] # Make sure this matches your DataFrame's column name
|
77 |
+
# SASB = row['SASB'] # Make sure this matches your DataFrame's column name
|
78 |
+
# news = row['title & content'] # Make sure this matches your DataFrame's column name
|
79 |
+
|
80 |
+
# # Call the function to get the sentiment
|
81 |
+
# sentiment = get_sentiment_gpt(company, SASB, news, model=model)
|
82 |
+
|
83 |
+
# # Update the DataFrame with the obtained sentiment
|
84 |
+
# df.at[index, column_name] = sentiment # Now correctly assigns the sentiment
|
85 |
+
|
86 |
+
# return df
|
87 |
+
|
88 |
+
# # Function to get embeddings, provided by you
|
89 |
+
# def get_embedding(text, model="text-embedding-3-small"):
|
90 |
+
# client = OpenAI()
|
91 |
+
# text = text.replace("\n", " ")
|
92 |
+
# return client.embeddings.create(input=[text], model=model).data[0].embedding
|
93 |
+
|
94 |
+
# # Function to calculate cosine similarity
|
95 |
+
# def cosine_similarity(v1, v2):
|
96 |
+
# return 1 - cosine(v1, v2)
|
97 |
+
|
98 |
+
# def calculate_sasb_embeddings(sasb_str):
|
99 |
+
# # Safely convert the string representation of a dictionary into an actual dictionary
|
100 |
+
# try:
|
101 |
+
# sasb_dict = ast.literal_eval(sasb_str)
|
102 |
+
# if not isinstance(sasb_dict, dict):
|
103 |
+
# raise ValueError("SASB column does not contain a valid dictionary.")
|
104 |
+
# except ValueError as e:
|
105 |
+
# print(f"Error converting SASB column to dictionary: {e}")
|
106 |
+
# return {}
|
107 |
+
|
108 |
+
# sasb_embeddings = {}
|
109 |
+
# for topic, content in sasb_dict.items():
|
110 |
+
# # Join the list of keywords into a single string
|
111 |
+
# combined_content = ' '.join(content)
|
112 |
+
# sasb_embeddings[topic] = get_embedding(combined_content)
|
113 |
+
# return sasb_embeddings
|
114 |
+
|
115 |
+
# # Function to process ESG classification
|
116 |
+
# def classify_esg(data):
|
117 |
+
# # Calculate embeddings for the news
|
118 |
+
# data['news_embeddings'] = data['title & content'].apply(get_embedding)
|
119 |
+
|
120 |
+
# # Calculate embeddings for SASB topics (you need to have your SASB topics defined)
|
121 |
+
# data['sasb_embeddings'] = data['SASB'].apply(calculate_sasb_embeddings)
|
122 |
+
|
123 |
+
# # Compute cosine similarities
|
124 |
+
# data['cosine_similarities'] = data.apply(
|
125 |
+
# lambda row: {topic: cosine_similarity(row['news_embeddings'], emb)
|
126 |
+
# for topic, emb in row['sasb_embeddings'].items()},
|
127 |
+
# axis=1
|
128 |
+
# )
|
129 |
+
|
130 |
+
# # Extract max cosine similarity
|
131 |
+
# data['max_cosine_similarity'] = data['cosine_similarities'].apply(lambda x: max(x.values()))
|
132 |
+
|
133 |
+
# # Mark the top 10% of news by max_cosine_similarity within each 'Sector' as 'Yes'
|
134 |
+
# sector_thresholds = data.groupby('Sector')['max_cosine_similarity'].quantile(0.9).to_dict()
|
135 |
+
# data['ESG_relevance'] = data.apply(
|
136 |
+
# lambda row: 'Yes' if row['max_cosine_similarity'] >= sector_thresholds[row['Sector']] else 'No',
|
137 |
+
# axis=1
|
138 |
+
# )
|
139 |
+
# return data
|
140 |
+
|
141 |
+
# def main():
|
142 |
+
# st.set_page_config(page_title="NLP ESG Project", page_icon="📈")
|
143 |
+
|
144 |
+
# # Custom styles
|
145 |
+
# st.markdown(
|
146 |
+
# """
|
147 |
+
# <style>
|
148 |
+
# .streamlit-container {
|
149 |
+
# background-color: #F5F5F5;
|
150 |
+
# }
|
151 |
+
# .stButton>button {
|
152 |
+
# width: 100%;
|
153 |
+
# border-radius: 10px;
|
154 |
+
# border: none;
|
155 |
+
# margin: 10px 0;
|
156 |
+
# padding: 15px 20px;
|
157 |
+
# background-color: #79AEC8;
|
158 |
+
# color: white;
|
159 |
+
# font-size: 18px;
|
160 |
+
# }
|
161 |
+
# .stButton>button:hover {
|
162 |
+
# background-color: #6699CC;
|
163 |
+
# }
|
164 |
+
# </style>
|
165 |
+
# """,
|
166 |
+
# unsafe_allow_html=True,
|
167 |
+
# )
|
168 |
+
|
169 |
+
# # Header section
|
170 |
+
# st.write("# NLP Project: ESG News Analysis and Financial Impact")
|
171 |
+
# st.sidebar.write("## Configuration")
|
172 |
+
|
173 |
+
# # API Key input
|
174 |
+
# openai_api_key = st.sidebar.text_input("Enter your OpenAI API key", type="password")
|
175 |
+
# openai_api_key = os.getenv('OPENAI_API_KEY')
|
176 |
+
# os.environ["OPENAI_API_KEY"] = openai_api_key
|
177 |
+
# openai.api_key = openai_api_key
|
178 |
+
|
179 |
+
# # File Upload
|
180 |
+
# st.sidebar.write("## Upload Data")
|
181 |
+
# uploaded_file = st.sidebar.file_uploader("", type="csv")
|
182 |
+
|
183 |
+
# # Investment Strategy Slider
|
184 |
+
# st.sidebar.markdown("### Investment Strategy")
|
185 |
+
# investment_strategy = st.sidebar.slider(
|
186 |
+
# "Investment Strategy",
|
187 |
+
# min_value=0.0, max_value=1.0, value=0.5, step=0.01,
|
188 |
+
# format="",
|
189 |
+
# help="0 is Conservative, 1 is Aggressive",
|
190 |
+
# label_visibility="collapsed"
|
191 |
+
# )
|
192 |
+
# st.sidebar.text(f"Current Strategy: {'Conservative' if investment_strategy <= 0.5 else 'Aggressive'}")
|
193 |
+
|
194 |
+
# # Main container
|
195 |
+
|
196 |
+
# st.sidebar.write("## Upload Data")
|
197 |
+
# uploaded_file = st.sidebar.file_uploader("Please upload a CSV file", type="csv", label_visibility="collapsed")
|
198 |
+
|
199 |
+
# if uploaded_file:
|
200 |
+
# # Displaying the file
|
201 |
+
# data = pd.read_csv(uploaded_file)
|
202 |
+
# st.session_state.classified_data = classify_esg(data)
|
203 |
+
# st.write("### Uploaded News Data:")
|
204 |
+
# st.dataframe(data, use_container_width=True)
|
205 |
+
|
206 |
+
# if st.button("🔍 Classify ESG"):
|
207 |
+
# st.write("Classifying ESG-related news...")
|
208 |
+
# try:
|
209 |
+
# with st.spinner("Calculating embeddings and similarities..."):
|
210 |
+
# st.session_state.classified_data = classify_esg(st.session_state.classified_data)
|
211 |
+
# st.write("Classified News Data:")
|
212 |
+
# st.dataframe(st.session_state.classified_data, use_container_width=True)
|
213 |
+
# except Exception as e:
|
214 |
+
# st.error(f"An error occurred: {e}")
|
215 |
+
|
216 |
+
# if st.button("😊 Determine Sentiment"):
|
217 |
+
# st.write("Determining sentiment using GPT...")
|
218 |
+
# # Run sentiment analysis with GPT
|
219 |
+
# try:
|
220 |
+
# with st.spinner("Analyzing sentiment..."):
|
221 |
+
# # Assume you have your API key set and a function defined to handle sentiment analysis
|
222 |
+
# st.session_state.updated_data = update_dataset_with_gpt_sentiment(st.session_state.classified_data, model='gpt-4-turbo-2024-04-09')
|
223 |
+
# st.write("News with GPT-based Sentiment Analysis:")
|
224 |
+
# st.dataframe(st.session_state.updated_data, use_container_width=True)
|
225 |
+
# except Exception as e:
|
226 |
+
# st.error(f"An error occurred: {e}")
|
227 |
+
|
228 |
+
# if st.button("📊 Alphalens Analysis"):
|
229 |
+
# # process_sentiment_data(sentiment_data = 'finbert_sentiment.csv', sector_ticker = 'sector_ticker.csv', prices = 'prices.csv')
|
230 |
+
# prices = pd.read_csv('prices.csv')
|
231 |
+
# factor_data = pd.read_csv('factor_data.csv')
|
232 |
+
# merged_data = pd.read_csv('merged_data.csv')
|
233 |
+
|
234 |
+
# alphalens_analysis(merged_data, prices)
|
235 |
+
|
236 |
+
# # Expander for advanced settings
|
237 |
+
# with st.expander("Advanced Settings"):
|
238 |
+
# st.write("Any advanced settings and configurations will go here.")
|
239 |
+
|
240 |
+
# if __name__ == "__main__":
|
241 |
+
# main()
|
242 |
|
243 |
import streamlit as st
|
244 |
import pandas as pd
|
245 |
import numpy as np
|
246 |
import os
|
247 |
+
import ast
|
248 |
import openai
|
249 |
+
from openai import OpenAI
|
250 |
import json
|
251 |
from getpass import getpass
|
252 |
+
from scipy.spatial.distance import cosine
|
253 |
from tqdm import tqdm
|
254 |
import matplotlib.pyplot as plt
|
255 |
|
|
|
324 |
|
325 |
return df
|
326 |
|
327 |
+
# Function to get embeddings, provided by you
|
328 |
+
def get_embedding(text, model="text-embedding-3-small"):
|
329 |
+
client = OpenAI()
|
330 |
+
text = text.replace("\n", " ")
|
331 |
+
return client.embeddings.create(input=[text], model=model).data[0].embedding
|
332 |
+
|
333 |
+
# Function to calculate cosine similarity
|
334 |
+
def cosine_similarity(v1, v2):
|
335 |
+
return 1 - cosine(v1, v2)
|
336 |
+
|
337 |
+
def calculate_sasb_embeddings(sasb_str):
|
338 |
+
# Safely convert the string representation of a dictionary into an actual dictionary
|
339 |
+
try:
|
340 |
+
sasb_dict = ast.literal_eval(sasb_str)
|
341 |
+
if not isinstance(sasb_dict, dict):
|
342 |
+
raise ValueError("SASB column does not contain a valid dictionary.")
|
343 |
+
except ValueError as e:
|
344 |
+
print(f"Error converting SASB column to dictionary: {e}")
|
345 |
+
return {}
|
346 |
+
|
347 |
+
sasb_embeddings = {}
|
348 |
+
for topic, content in sasb_dict.items():
|
349 |
+
# Join the list of keywords into a single string
|
350 |
+
combined_content = ' '.join(content)
|
351 |
+
sasb_embeddings[topic] = get_embedding(combined_content)
|
352 |
+
return sasb_embeddings
|
353 |
+
|
354 |
+
# Function to process ESG classification
|
355 |
+
def classify_esg(data):
|
356 |
+
# Calculate embeddings for the news
|
357 |
+
data['news_embeddings'] = data['title & content'].apply(get_embedding)
|
358 |
+
|
359 |
+
# Calculate embeddings for SASB topics (you need to have your SASB topics defined)
|
360 |
+
data['sasb_embeddings'] = data['SASB'].apply(calculate_sasb_embeddings)
|
361 |
+
|
362 |
+
# Compute cosine similarities
|
363 |
+
data['cosine_similarities'] = data.apply(
|
364 |
+
lambda row: {topic: cosine_similarity(row['news_embeddings'], emb)
|
365 |
+
for topic, emb in row['sasb_embeddings'].items()},
|
366 |
+
axis=1
|
367 |
+
)
|
368 |
+
|
369 |
+
# Extract max cosine similarity
|
370 |
+
data['max_cosine_similarity'] = data['cosine_similarities'].apply(lambda x: max(x.values()))
|
371 |
+
|
372 |
+
# Mark the top 10% of news by max_cosine_similarity within each 'Sector' as 'Yes'
|
373 |
+
sector_thresholds = data.groupby('Sector')['max_cosine_similarity'].quantile(0.9).to_dict()
|
374 |
+
data['ESG_relevance'] = data.apply(
|
375 |
+
lambda row: 'Yes' if row['max_cosine_similarity'] >= sector_thresholds[row['Sector']] else 'No',
|
376 |
+
axis=1
|
377 |
+
)
|
378 |
+
return data
|
379 |
+
|
380 |
+
def main():
|
381 |
st.set_page_config(page_title="NLP ESG Project", page_icon="📈")
|
382 |
|
383 |
# Custom styles
|
|
|
411 |
|
412 |
# API Key input
|
413 |
openai_api_key = st.sidebar.text_input("Enter your OpenAI API key", type="password")
|
414 |
+
openai_api_key = os.getenv('OPENAI_API_KEY')
|
415 |
os.environ["OPENAI_API_KEY"] = openai_api_key
|
416 |
openai.api_key = openai_api_key
|
417 |
|
|
|
431 |
st.sidebar.text(f"Current Strategy: {'Conservative' if investment_strategy <= 0.5 else 'Aggressive'}")
|
432 |
|
433 |
# Main container
|
434 |
+
if uploaded_file:
|
435 |
# Displaying the file
|
436 |
data = pd.read_csv(uploaded_file)
|
437 |
+
st.session_state.classified_data = classify_esg(data)
|
438 |
st.write("### Uploaded News Data:")
|
439 |
st.dataframe(data, use_container_width=True)
|
440 |
|
441 |
if st.button("🔍 Classify ESG"):
|
442 |
st.write("Classifying ESG-related news...")
|
443 |
+
try:
|
444 |
+
with st.spinner("Calculating embeddings and similarities..."):
|
445 |
+
st.session_state.classified_data = classify_esg(st.session_state.classified_data)
|
446 |
+
st.write("Classified News Data:")
|
447 |
+
st.dataframe(st.session_state.classified_data, use_container_width=True)
|
448 |
+
except Exception as e:
|
449 |
+
st.error(f"An error occurred: {e}")
|
450 |
|
451 |
if st.button("😊 Determine Sentiment"):
|
452 |
st.write("Determining sentiment using GPT...")
|
|
|
454 |
try:
|
455 |
with st.spinner("Analyzing sentiment..."):
|
456 |
# Assume you have your API key set and a function defined to handle sentiment analysis
|
457 |
+
st.session_state.updated_data = update_dataset_with_gpt_sentiment(st.session_state.classified_data, model='gpt-4-turbo-2024-04-09')
|
458 |
st.write("News with GPT-based Sentiment Analysis:")
|
459 |
+
st.dataframe(st.session_state.updated_data, use_container_width=True)
|
460 |
except Exception as e:
|
461 |
st.error(f"An error occurred: {e}")
|
462 |
|
|
|
467 |
with st.expander("Advanced Settings"):
|
468 |
st.write("Any advanced settings and configurations will go here.")
|
469 |
|
|
|
|
|
|
|
470 |
if __name__ == "__main__":
|
471 |
+
main()
|
equal_weighted_benchmark_df.csv
ADDED
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
,date,equal_weighted_benchmark,5d_mean_return,10d_mean_return,20d_mean_return
|
2 |
+
21,2022-09-14 00:00:00+00:00,-0.003676159000736702,-0.004098304655096071,-0.008331580823424601,-0.0043233482313036415
|
3 |
+
22,2022-09-15 00:00:00+00:00,-0.014448647849191252,-0.00811570954039362,-0.007425553791892592,-0.005290966130099582
|
4 |
+
23,2022-09-16 00:00:00+00:00,-0.01018150984258006,-0.010291577429425948,-0.007172411076246634,-0.0023074253300825035
|
5 |
+
24,2022-09-19 00:00:00+00:00,0.009131715240025375,-0.010138971898947535,-0.006049274613087459,-0.0023989673424125372
|
6 |
+
25,2022-09-20 00:00:00+00:00,-0.0013076505427114021,-0.011908672204641485,-0.004016785108368158,-0.001667495719771195
|
7 |
+
26,2022-09-21 00:00:00+00:00,-0.02410392747782434,-0.012843596836909185,-0.0033574809954569616,-0.0022232276080379823
|
8 |
+
27,2022-09-22 00:00:00+00:00,-0.025717711927302645,-0.007054626047366221,-0.0005691883822669203,-0.0012626184643332202
|
9 |
+
28,2022-09-23 00:00:00+00:00,-0.00921157187938197,-0.004281643476182188,0.0003177701662729227,-0.0005353617907464471
|
10 |
+
29,2022-09-26 00:00:00+00:00,-0.00048038023989492915,-0.0020482297281117206,0.00022344205284363693,0.0014588809659362722
|
11 |
+
30,2022-09-27 00:00:00+00:00,-0.006307341359481893,0.004207864879698234,-0.001060793557155493,0.0017115541097231192
|
12 |
+
31,2022-09-28 00:00:00+00:00,0.006330843278072321,0.006711352378629434,-0.00029325822628145236,0.0032371122185726256
|
13 |
+
32,2022-09-29 00:00:00+00:00,-0.011724517037708796,0.00620640676792534,-0.003413101647874253,0.0032347424556726633
|
14 |
+
33,2022-09-30 00:00:00+00:00,0.0018495754567621427,0.004946832290400967,0.0027578805195729995,0.0036136359005584486
|
15 |
+
34,2022-10-03 00:00:00+00:00,0.031018002364648946,0.002382947233928605,0.0013140047327894195,0.004264118947733028
|
16 |
+
35,2022-10-04 00:00:00+00:00,0.00580128834931611,-0.006320930191630265,0.0007028635860197298,0.0031912776861333127
|
17 |
+
36,2022-10-05 00:00:00+00:00,0.0038416563336735906,-0.007159056539681236,-0.0011526691448603224,0.002274586532981925
|
18 |
+
37,2022-10-06 00:00:00+00:00,-0.01798577177022608,-0.01268015834390541,-0.0019770040155771347,0.00042830915460623605
|
19 |
+
38,2022-10-07 00:00:00+00:00,-0.0106111914983441,0.0005950230744271944,-0.0013664621449848522,0.0025698646510722016
|
20 |
+
39,2022-10-10 00:00:00+00:00,-0.012968828756671921,0.00028552263376673383,0.002645817758450723,0.003383232250678209
|
21 |
+
40,2022-10-11 00:00:00+00:00,0.0015507390715413976,0.007995292527418242,0.0044506647980164125,0.004404850114552215
|
22 |
+
41,2022-10-12 00:00:00+00:00,-0.025093912254979374,0.0050514656435360515,0.006740026005053216,0.004128657804646016
|
23 |
+
42,2022-10-13 00:00:00+00:00,0.05200138806905839,0.009344607857668694,0.010223197273261998,0.006801831145217036
|
24 |
+
43,2022-10-14 00:00:00+00:00,-0.012209288945237099,-0.0033260930873431956,0.00427888322523018,0.005193443672798097
|
25 |
+
44,2022-10-17 00:00:00+00:00,0.025213559469038502,0.005023441177708918,0.007086754114892575,0.006035676422397556
|
26 |
+
45,2022-10-18 00:00:00+00:00,-0.012778164543042156,0.0008588121267430121,0.0055942624511079436,0.004999830632095196
|
27 |
+
46,2022-10-19 00:00:00+00:00,-0.004593051792337033,0.008194656122117549,0.005763452950807643,0.005127444102465934
|
28 |
+
47,2022-10-20 00:00:00+00:00,-0.011707362492000929,0.010570150789625577,0.002867315957998419,0.004374917388519577
|
29 |
+
48,2022-10-21 00:00:00+00:00,0.02959810000422721,0.01211361422917934,0.006601291022304565,0.006118164276748382
|
30 |
+
49,2022-10-24 00:00:00+00:00,0.004351857113112529,0.008985688957480361,0.004032491879711047,0.004075057070100423
|
31 |
+
50,2022-10-25 00:00:00+00:00,0.02353960069183561,0.01029924636627716,0.004169854287116967,0.004362253021904609
|
32 |
+
51,2022-10-26 00:00:00+00:00,0.006655722595231783,0.0031780048609374107,0.0014094625537276857,0.003422747853239372
|
33 |
+
52,2022-10-27 00:00:00+00:00,-0.0045738265529560165,-0.0045362994782050204,0.0030547040125687365,0.0033921065755376425
|
34 |
+
53,2022-10-28 00:00:00+00:00,0.014491488053761043,0.0009977835965575294,0.00584218970073483,0.0033196750893709414
|
35 |
+
54,2022-10-31 00:00:00+00:00,0.010623521518342454,-0.0008957028827467486,0.0046466853676672415,0.002123153162326777
|
36 |
+
55,2022-11-01 00:00:00+00:00,-0.011149948426442515,-0.0019018448298914236,0.00417378663201427,0.0017436676511514526
|
37 |
+
56,2022-11-02 00:00:00+00:00,-0.0317018465087496,-0.00038511181416520335,0.004241303048243899,0.0037937682687762425
|
38 |
+
57,2022-11-03 00:00:00+00:00,0.023672653813776284,0.010907415734302213,0.005717489711766106,0.004667984801179622
|
39 |
+
58,2022-11-04 00:00:00+00:00,0.004877327738659193,0.010726166608973907,0.0053118296322522945,0.0035912822399911896
|
40 |
+
59,2022-11-07 00:00:00+00:00,0.0056193648553276125,0.010365722373878391,0.003976180662520567,0.0026974750636089826
|
41 |
+
60,2022-11-08 00:00:00+00:00,-0.0034980949721803317,0.010454171748779021,0.004389242344400509,0.0017444841092120649
|
42 |
+
61,2022-11-09 00:00:00+00:00,0.023096640255727902,0.008962463760018928,0.005381134793118519,0.0022076431310965885
|
43 |
+
62,2022-11-10 00:00:00+00:00,0.022445370157588133,0.0004473559719162085,0.0036342320147014326,0.0008981438759837245
|
44 |
+
63,2022-11-11 00:00:00+00:00,0.0029647455307566235,-0.00010862140713379332,0.0007734572539003317,-0.0003605695447114842
|
45 |
+
64,2022-11-14 00:00:00+00:00,0.006009028155624304,-0.002092974244106969,-0.00027674591649861757,0.0013991941478267607
|
46 |
+
65,2022-11-15 00:00:00+00:00,-0.010367358114339606,-0.0013213151032776165,-0.00054064586546945,0.00010610733776163811
|
47 |
+
66,2022-11-16 00:00:00+00:00,-0.01783699590204662,0.001897438852983208,0.003294878175626652,-0.00027251094978617414
|
48 |
+
67,2022-11-17 00:00:00+00:00,0.019555673624933614,0.006828897752946318,0.0034445923547001493,-0.0003571100874945935
|
49 |
+
68,2022-11-18 00:00:00+00:00,-0.0072918795158902795,0.001657995041880389,0.0018145154577246846,-0.0013897707927707614
|
50 |
+
69,2022-11-21 00:00:00+00:00,0.009723643434801197,0.0015510603620931208,0.0013430026592687863,-0.0015683766710776707
|
51 |
+
70,2022-11-22 00:00:00+00:00,0.005759780551965727,0.0002711357374123586,-0.0008967914886358623,-0.001495973072915131
|
52 |
+
71,2022-11-23 00:00:00+00:00,0.006295096012988876,0.0046833577595701125,-0.0009277130177343687,-0.0017999783977110737
|
53 |
+
72,2022-11-25 00:00:00+00:00,-0.005976146401571202,8.099722632682047e-05,-0.0017781511474425098,-0.0022840369626763724
|
54 |
+
73,2022-11-28 00:00:00+00:00,-0.007814302311672312,0.001980780515448426,-0.0015152294865902847,-0.0015899960613449957
|
55 |
+
74,2022-11-29 00:00:00+00:00,0.0032986264786386797,0.0011151282248217215,0.0030806208868460055,-0.001217022491572957
|
56 |
+
75,2022-11-30 00:00:00+00:00,0.0279746941884447,-0.002047443340334215,0.0007466396006418766,-0.0017768779636501985
|
57 |
+
76,2022-12-01 00:00:00+00:00,-0.016352882788051644,-0.006394598044119795,-0.0037531595621781923,-0.002941462762931817
|
58 |
+
77,2022-12-02 00:00:00+00:00,0.00337744659239721,-0.003659860893787665,-0.004035585947964563,-0.001753146786279652
|
59 |
+
78,2022-12-05 00:00:00+00:00,-0.01209042554527834,-0.004920695037299253,-0.004520806783547946,-0.0019170326401667988
|
60 |
+
79,2022-12-06 00:00:00+00:00,-0.01260211701899184,0.005008673283487237,-0.004411106239523519,-0.0012863211194042838
|
61 |
+
80,2022-12-07 00:00:00+00:00,0.0054668424990757655,0.003578888247210838,-0.0021108389165162425,-0.00039497903697917883
|
62 |
+
81,2022-12-08 00:00:00+00:00,-0.0025281110944274376,-0.001158953755318699,-0.002712652494580931,0.0002611377891841786
|
63 |
+
82,2022-12-09 00:00:00+00:00,-0.0030581677096146818,-0.004485304011963333,-0.002838439871711985,0.00012013770303957311
|
64 |
+
83,2022-12-12 00:00:00+00:00,0.038459837053426336,-0.004204296782894843,-0.001682117639733388,0.000955921367436175
|
65 |
+
84,2022-12-13 00:00:00+00:00,-0.019439876506603074,-0.013443002800764471,-0.005299096662726175,-0.00042983011041462674
|
66 |
+
85,2022-12-14 00:00:00+00:00,-0.017943709493747392,-0.007634336469754126,-0.0042559273519537375,0.0001271483306378504
|
67 |
+
86,2022-12-15 00:00:00+00:00,-0.019156928642757205,-0.004258643551772655,-0.002209615432008975,0.0016259677434219133
|
68 |
+
87,2022-12-16 00:00:00+00:00,-0.0016701182490717148,-0.0012211795063897244,0.0005605489276259438,0.0026765297699224036
|
69 |
+
88,2022-12-19 00:00:00+00:00,-0.010985076265142509,0.0008081326010809642,0.0007033595870670358,0.0013818632373116411
|
70 |
+
89,2022-12-20 00:00:00+00:00,0.011286800676776778,0.0029639323447359475,0.001934726087374865,0.001938090675887691
|
71 |
+
90,2022-12-21 00:00:00+00:00,-0.0007796221147050135,-0.0009578917211637821,0.00135614882488803,0.002144071681145062
|
72 |
+
91,2022-12-22 00:00:00+00:00,-0.0038849057168939783,-0.00015662701462072144,0.003366475552433948,0.002499192033871605
|
73 |
+
92,2022-12-23 00:00:00+00:00,0.008522759321651276,0.002373848466957648,0.0032046714130471816,0.002343718221016368
|
74 |
+
93,2022-12-27 00:00:00+00:00,-0.0004069620633721189,0.000606894125618909,0.003732794819247755,0.0027976522861079722
|
75 |
+
94,2022-12-28 00:00:00+00:00,-0.008134940624643141,0.0009048488555181238,0.004887554807300264,0.002836614455128162
|
76 |
+
95,2022-12-29 00:00:00+00:00,0.003138254119539179,0.003677972424751826,0.004841251866237464,0.0031621774740277484
|
77 |
+
96,2022-12-30 00:00:00+00:00,0.008862472869208805,0.006892494230496532,0.005684334529429424,0.0029175460267830363
|
78 |
+
97,2023-01-03 00:00:00+00:00,-0.0004454056107618189,0.00393891621987011,0.004802254891621128,0.002913869213491017
|
79 |
+
98,2023-01-04 00:00:00+00:00,0.001066202344047466,0.0068204956456235834,0.0021124611665669065,0.003965826277727658
|
80 |
+
99,2023-01-05 00:00:00+00:00,0.00539287954472177,0.008744123929100633,0.0019087545284069858,0.0035962825339874486
|
81 |
+
100,2023-01-06 00:00:00+00:00,0.019080784087096292,0.0058348405945136605,0.002919496869240496,0.0029248423599723596
|
82 |
+
101,2023-01-09 00:00:00+00:00,-0.005423323975577465,0.004225346680399479,0.0015359724910240552,0.0017124357670201382
|
83 |
+
102,2023-01-10 00:00:00+00:00,0.013422394452403931,0.005490505891646507,0.0013807969121424908,0.002336908981798623
|
84 |
+
103,2023-01-11 00:00:00+00:00,0.010501782665511043,-0.0025566610321382474,0.0017008707949891402,0.0017691969049688369
|
85 |
+
104,2023-01-12 00:00:00+00:00,-0.008683084200997962,-0.004771227639351861,0.0006392084677149688,0.00024085982014168195
|
86 |
+
105,2023-01-13 00:00:00+00:00,0.011211035819871068,-6.135031418093928e-05,0.001331736307231877,0.0010561955271994425
|
87 |
+
106,2023-01-17 00:00:00+00:00,0.0005965124072326647,-0.001174437672390838,6.623370925527113e-05,0.00078895049489153
|
88 |
+
107,2023-01-18 00:00:00+00:00,-0.025959436629375873,-0.002652241101487527,0.0009280279345257354,0.000520682708660935
|
89 |
+
108,2023-01-19 00:00:00+00:00,-0.0007792090132950034,0.005998389613138973,0.005556821803484369,0.001790337226031672
|
90 |
+
109,2023-01-20 00:00:00+00:00,0.015171332460758516,0.0061634913246250735,0.005084514674461852,0.001694383648083702
|
91 |
+
110,2023-01-23 00:00:00+00:00,0.005542971074545104,0.0026606667064368937,0.002725384864575145,0.0005829093622090459
|
92 |
+
111,2023-01-24 00:00:00+00:00,-0.006622727888738723,0.0013327048350599657,0.0017756832880997334,-0.0002611183863901375
|
93 |
+
112,2023-01-25 00:00:00+00:00,0.016496864760670086,0.00456974537899959,0.0031524633433066183,0.00027312040798949794
|
94 |
+
113,2023-01-26 00:00:00+00:00,8.113506539531873e-05,0.004926332892019173,0.001729924350080279,-0.0012505797471628595
|
95 |
+
114,2023-01-27 00:00:00+00:00,-0.0019495737438668711,0.003857271982804954,-0.00019183771654848798,-0.0006585410819113961
|
96 |
+
115,2023-01-30 00:00:00+00:00,-0.00099148612495581,0.002715162306309398,0.0007423704742227855,-0.0009786871366024893
|
97 |
+
116,2023-01-31 00:00:00+00:00,0.009280402583510174,0.0021854906270825964,0.0014632424254074508,-0.0011627945038930286
|
98 |
+
117,2023-02-01 00:00:00+00:00,0.01807208776262952,0.001624306926245983,5.5996165418699724e-05,-0.001850062269103672
|
99 |
+
118,2023-02-02 00:00:00+00:00,-0.005286271143741116,-0.0014152827676169523,-0.0018714186626452999,-0.0018426042323073531
|
100 |
+
119,2023-02-03 00:00:00+00:00,-0.00735610191660725,-0.004034184361960209,-0.0015952112175047963,-0.0011180493955347387
|
101 |
+
120,2023-02-06 00:00:00+00:00,-0.0035404118498477645,-0.0010880312237595148,-0.0014860785350897109,-0.0010126401388516049
|
102 |
+
121,2023-02-07 00:00:00+00:00,0.006591189675830193,0.0008091400921922269,-0.002227611292490897,-0.0016015572664364727
|
103 |
+
122,2023-02-08 00:00:00+00:00,0.002722672074499014,-0.00145984913090635,-0.002481905827220295,-0.001753163968636046
|
104 |
+
123,2023-02-09 00:00:00+00:00,-0.01863649800814907,-0.002328582952201085,-0.00411753055898942,-0.0030074135500991186
|
105 |
+
124,2023-02-10 00:00:00+00:00,0.007678079567730095,0.0009154827917578608,-0.0011238194828288672,-0.0035726306795227088
|
106 |
+
125,2023-02-13 00:00:00+00:00,0.006112763508391236,-0.0018560342559001501,-0.0026927013376944766,-0.0029216814590479244
|
107 |
+
126,2023-02-14 00:00:00+00:00,-0.00477608908294868,-0.005254346119167877,-0.0037429459423787784,-0.004198386147281157
|
108 |
+
127,2023-02-15 00:00:00+00:00,-0.0016779104300324093,-0.003538787850179554,-0.0037421445979662895,-0.004128963605615242
|
109 |
+
128,2023-02-16 00:00:00+00:00,-0.0025715184411038345,-0.0059761742312147694,-0.0018586028199844477,-0.0033253898156173634
|
110 |
+
129,2023-02-17 00:00:00+00:00,-0.006241038440088623,-0.0031328292440078054,-0.000669474121235514,-0.003639933277377052
|
111 |
+
130,2023-02-21 00:00:00+00:00,-0.011079628060444846,-0.003524099040014799,-0.0005459995477119063,-0.002429861328467912
|
112 |
+
131,2023-02-22 00:00:00+00:00,0.003989484205406339,-0.0022725958242575377,-0.0009942324563652996,-0.0019073630129376826
|
113 |
+
132,2023-02-23 00:00:00+00:00,-0.014204582469811391,-0.004012438705374892,-0.0010596116561342839,-0.002917469152075522
|
114 |
+
133,2023-02-24 00:00:00+00:00,0.012116901005240929,0.0023769684864330125,-0.001985397748864818,-0.0028627172299213795
|
115 |
+
134,2023-02-27 00:00:00+00:00,-0.008285144803713385,0.0018614725168118733,-0.006104966124183615,-0.0022968902683756696
|
116 |
+
135,2023-02-28 00:00:00+00:00,-0.004807402525172574,0.0024761599269984085,-0.0032439344630809767,-0.0020429450955181806
|
117 |
+
136,2023-03-01 00:00:00+00:00,-0.004827232807062231,0.00027420356036656233,-0.00485372596501946,-0.0011934862887619702
|
118 |
+
137,2023-03-02 00:00:00+00:00,0.017930193919978674,0.0019492770509415744,-0.004661128176875345,-0.0003157692613777001
|
119 |
+
138,2023-03-03 00:00:00+00:00,0.009529409984429534,-0.00627793919095608,-0.0048926759315512025,-0.0011570698350831749
|
120 |
+
139,2023-03-06 00:00:00+00:00,-0.005176985690555578,-0.013935513609302596,-0.006659345106030147,-0.0010800877189296768
|
121 |
+
140,2023-03-07 00:00:00+00:00,-0.015659807800211298,-0.008892647319852509,-0.004347146058003588,-0.0007134469206721708
|
122 |
+
141,2023-03-08 00:00:00+00:00,0.003381326477880937,-0.010040088163365053,-0.0028445778739169805,-0.0006077551608841233
|
123 |
+
142,2023-03-09 00:00:00+00:00,-0.023663304953076534,-0.01129818982215423,-0.00485740218875114,-0.0007560598528678973
|
124 |
+
143,2023-03-10 00:00:00+00:00,-0.030893398613373615,-0.0038206077045881684,-0.0039120160508721415,0.0002554549397418739
|
125 |
+
144,2023-03-13 00:00:00+00:00,0.022362175123981063,0.0004726877276335097,0.0015302959531613201,0.0025145261615524264
|
126 |
+
145,2023-03-14 00:00:00+00:00,-0.022348761476505004,3.87366713232587e-05,-0.0009854352607259823,0.001845601878591786
|
127 |
+
146,2023-03-15 00:00:00+00:00,-0.003325296700711762,0.0046958963994744455,0.0025982415691328847,0.002495461635709717
|
128 |
+
147,2023-03-16 00:00:00+00:00,0.015840822248179467,0.0018117355447859551,0.004335236186220326,0.002976216305923911
|
129 |
+
148,2023-03-17 00:00:00+00:00,-0.00946384581669036,-0.004157422162841717,0.002765511695353645,0.0019509949740394807
|
130 |
+
149,2023-03-20 00:00:00+00:00,0.020467800118520695,0.002622247225075212,0.0049234985025505,0.0029354726394914344
|
131 |
+
150,2023-03-21 00:00:00+00:00,0.0003277948707486952,-0.0020523701485808234,0.0030561322119619866,0.001582701834217248
|
132 |
+
151,2023-03-22 00:00:00+00:00,-0.017672005575669312,0.0005096764402386216,0.001572604648531574,0.00149770455947494
|
133 |
+
152,2023-03-23 00:00:00+00:00,-0.014087653674746592,0.006824496724776386,0.003478823155561836,0.0025724538644252156
|
134 |
+
153,2023-03-24 00:00:00+00:00,0.02506545690589547,0.00997544984123682,0.0046701335408740295,0.0032502924928142535
|
135 |
+
154,2023-03-27 00:00:00+00:00,-0.003077478717735781,0.007153683367903214,0.0034858729391427554,0.0017333852376485645
|
136 |
+
155,2023-03-28 00:00:00+00:00,0.013277255588550533,0.008286840846777166,0.004760086452127288,0.0011016864749227807
|
137 |
+
156,2023-03-29 00:00:00+00:00,0.01328306158951853,0.0026215148784006865,0.0023383992050122837,0.0002527685478874206
|
138 |
+
157,2023-03-30 00:00:00+00:00,0.0007077524431417956,0.00012008894023344704,0.0015562642179077924,0.0001643135986638564
|
139 |
+
158,2023-03-31 00:00:00+00:00,0.011213629538062238,-0.0006085838330772233,0.0010980689017193001,0.0007130687983601223
|
140 |
+
159,2023-04-03 00:00:00+00:00,0.002356406556685244,-0.0001489595305806355,0.0009176653826771084,-8.663952481327951e-05
|
141 |
+
160,2023-04-04 00:00:00+00:00,-0.014246622422067666,0.0012058815462295687,0.00012426620112042258,-0.0006396890750387115
|
142 |
+
161,2023-04-05 00:00:00+00:00,0.0006305606526503252,0.0020665270297940937,0.0014426223807864995,-0.0004797745343940435
|
143 |
+
162,2023-04-06 00:00:00+00:00,-0.002805109031269909,0.003066165517162774,0.0016668019245809384,-0.0003731449534251712
|
144 |
+
163,2023-04-10 00:00:00+00:00,0.013735435400105108,0.00288069576832514,0.0017993902778292862,0.0002724251663713502
|
145 |
+
164,2023-04-11 00:00:00+00:00,0.009201902383613035,0.0020498361285647302,-3.634579558802559e-06,-0.0008399994132819134
|
146 |
+
165,2023-04-12 00:00:00+00:00,-0.010099698021878508,-0.0009169135538235588,-0.0024300513391067036,-0.0008489977786886353
|
147 |
+
166,2023-04-13 00:00:00+00:00,0.005428250496712833,0.0008164283218685845,-0.0017970064326388079,-0.00099840474370093
|
148 |
+
167,2023-04-14 00:00:00+00:00,-0.003757959885105673,0.0002746810866338222,-0.0012090745656069487,-0.001104722555975088
|
149 |
+
168,2023-04-17 00:00:00+00:00,0.009615615623868853,0.0007208890503381228,0.00035019010146842145,-0.0010270486977872206
|
150 |
+
169,2023-04-18 00:00:00+00:00,-0.0056762236829699265,-0.002030069987767104,-0.0010858401938365063,-0.00153119996190006
|
151 |
+
170,2023-04-19 00:00:00+00:00,-0.0014144637304471098,-0.003948818537710862,-0.001384367930451579,-0.0015277828598550722
|
152 |
+
171,2023-04-20 00:00:00+00:00,0.002767335538123379,-0.004371467023061288,-0.002329631358592278,-0.0011637325736494279
|
153 |
+
172,2023-04-21 00:00:00+00:00,-0.0015995437753341312,-0.0026919141540402204,-0.0023412169429187584,-0.0006985843747975502
|
154 |
+
173,2023-04-24 00:00:00+00:00,-0.004261316252620565,-1.2132422075916005e-05,-0.0012368294266921992,-0.0009231818344172815
|
155 |
+
174,2023-04-25 00:00:00+00:00,-0.015330205921747584,-0.0001464687475278962,-0.0016991504263254979,-0.0008547982097039953
|
156 |
+
175,2023-04-26 00:00:00+00:00,-0.0035815642851968615,0.001150759745462075,0.0007214184566291697,-0.000578360479956867
|
157 |
+
176,2023-04-27 00:00:00+00:00,0.011332839034553677,-0.0003335937809039507,-0.0002453970938456484,-0.0007595118405073977
|
158 |
+
177,2023-04-28 00:00:00+00:00,0.01209668464630116,-0.002056498730682904,-0.0010425101440012525,-0.0013056366851330193
|
159 |
+
178,2023-05-01 00:00:00+00:00,-0.004951547388086369,-0.002440221572762325,-0.0023956354466531053,-0.001547631939271932
|
160 |
+
179,2023-05-02 00:00:00+00:00,-0.009045843080407708,-0.0032665179294349422,-0.0020051045929590948,-0.0017460450497451274
|
161 |
+
180,2023-05-03 00:00:00+00:00,-0.011006886224849886,0.00036120829157882864,-0.0016641871980273089,-0.0013811482194951486
|
162 |
+
181,2023-05-04 00:00:00+00:00,0.0029863923465769646,-0.00010450529153132324,6.508300172897021e-05,-0.00012800140346046917
|
163 |
+
182,2023-05-05 00:00:00+00:00,0.010128534825498374,-5.458237791088149e-05,0.0009917209636422881,0.00046841856992471827
|
164 |
+
183,2023-05-08 00:00:00+00:00,-0.009136219501509744,-0.00245718288322845,-0.0006404415640381346,-0.00029744731241896657
|
165 |
+
184,2023-05-09 00:00:00+00:00,0.009134719588678399,-0.0007813572823112697,1.451784730363092e-05,0.0006676809511418545
|
166 |
+
185,2023-05-10 00:00:00+00:00,-0.01345951252511127,-0.003724450181638795,-0.0018550824199567396,0.00047141951171005686
|
167 |
+
186,2023-05-11 00:00:00+00:00,0.0033485663392059132,0.00023264869150747055,-0.0012372421335884874,0.0013526803986437489
|
168 |
+
187,2023-05-12 00:00:00+00:00,-0.00208474430806864,0.0020327346613681703,-0.001573398245212833,0.0010668769695232954
|
169 |
+
188,2023-05-15 00:00:00+00:00,-0.0005703596420971196,0.0012520961326405945,-0.0006580343205526637,0.001587392042321372
|
170 |
+
189,2023-05-16 00:00:00+00:00,-0.00585415173007016,0.0008038751409096218,-0.0015322717742134691,0.0020830549603725137
|
171 |
+
190,2023-05-17 00:00:00+00:00,0.0063044266332458136,4.7655153033113454e-05,-0.001149066994452006,0.0020388447043188517
|
172 |
+
191,2023-05-18 00:00:00+00:00,0.01234842785226991,-0.002743948062611998,-0.0003918021064199368,0.0026163283368395847
|
173 |
+
192,2023-05-19 00:00:00+00:00,-0.006053780525975995,-0.005203113338916268,-0.00011999804168715282,0.0013612889995953423
|
174 |
+
193,2023-05-22 00:00:00+00:00,-0.0026944962988614167,-0.002693343925591055,-2.0534929884328186e-05,0.0013236447061455551
|
175 |
+
194,2023-05-23 00:00:00+00:00,-0.009522367917327265,-0.003928326292141894,0.0012238576097094952,0.0015027709784063842
|
176 |
+
195,2023-05-24 00:00:00+00:00,-0.007641177002339895,-0.0023421844162432493,0.002798233817540908,0.0014900550309321198
|
177 |
+
196,2023-05-25 00:00:00+00:00,-0.0001905473120216703,0.00193197166130356,0.003915544644537916,0.0018232747112172107
|
178 |
+
197,2023-05-26 00:00:00+00:00,0.006540152516733684,0.005059674186430057,0.0037046720419311482,0.0021907460419811954
|
179 |
+
198,2023-05-30 00:00:00+00:00,-0.0088244545691427,0.002743089568269341,0.003778929517181903,0.002296651995244971
|
180 |
+
199,2023-05-31 00:00:00+00:00,-0.001691616090709225,0.00652604423805196,0.005715785057655282,0.0027152150898712662
|
181 |
+
200,2023-06-01 00:00:00+00:00,0.013868790843444482,0.008104015765929042,0.005224102488384953,0.0035898454462415376
|
182 |
+
201,2023-06-02 00:00:00+00:00,0.015528136682794893,0.0058277250484479615,0.005606491437869229,0.0028581695227268672
|
183 |
+
202,2023-06-05 00:00:00+00:00,-0.005016002323495917,0.0022296952884552357,0.002820090549864296,0.0021034013751807423
|
184 |
+
203,2023-06-06 00:00:00+00:00,0.009783257567705485,0.004764002421098952,0.00264099202823996,0.0019057355354283512
|
185 |
+
204,2023-06-07 00:00:00+00:00,0.0058390292847627055,0.0047130694133684856,0.0017654066003668317,0.0012883627151148238
|
186 |
+
205,2023-06-08 00:00:00+00:00,0.002989780386127086,0.0023138761946370563,0.0001947415284436743,0.001192226635124865
|
187 |
+
206,2023-06-09 00:00:00+00:00,-0.0021539266950499356,0.005272486714021355,-0.00022128981323335143,0.0016706207695048041
|
188 |
+
207,2023-06-12 00:00:00+00:00,0.007251276348906687,0.003388512404074902,0.0006821254212582117,0.002690275907514167
|
189 |
+
208,2023-06-13 00:00:00+00:00,0.00967122048372566,0.0005348845922808931,0.0007956158842428745,0.0021704367560073526
|
190 |
+
209,2023-06-14 00:00:00+00:00,-0.006106718518466828,-0.0011273806604994558,-0.0002780148090930612,0.0017631684102008983
|
191 |
+
210,2023-06-15 00:00:00+00:00,0.017630611695106983,-0.0019176576411197251,0.0018118097046250304,0.0016205063397661463
|
192 |
+
211,2023-06-16 00:00:00+00:00,-0.011184388465247943,-0.005565190819175996,7.547616579636971e-05,0.0009053389442838483
|
193 |
+
212,2023-06-20 00:00:00+00:00,-0.006922955190996941,-0.001990829209157894,0.001322279637752321,0.0019318286555970845
|
194 |
+
213,2023-06-21 00:00:00+00:00,0.001277123669441989,0.0010465681947230095,0.0011166373453257404,0.0023997913041489778
|
195 |
+
214,2023-06-22 00:00:00+00:00,-0.010096598601931683,0.0006051912730974876,0.0007800115258499162,0.00253315114438728
|
196 |
+
215,2023-06-23 00:00:00+00:00,-0.0011157176303866906,0.005665133537497064,0.0022251863060054715,0.0030577452036504517
|
197 |
+
216,2023-06-26 00:00:00+00:00,0.007069687543506344,0.005937109485270262,0.0035901237902501468,0.0031452220570207467
|
198 |
+
217,2023-06-27 00:00:00+00:00,0.00828215834577142,0.004710291179203678,0.0046720430319264865,0.0027777746079635144
|
199 |
+
218,2023-06-28 00:00:00+00:00,-0.001014645239945392,0.001208066517870794,0.003540499572992292,0.0028063883219875403
|
200 |
+
219,2023-06-29 00:00:00+00:00,0.014687380639515161,0.0009924306610093808,0.0038555571061524596,0.002666395833051619
|
201 |
+
220,2023-06-30 00:00:00+00:00,0.00034279922443726567,-0.0011803272038567402,0.001419575077643697,0.001820824482698578
|
202 |
+
221,2023-07-03 00:00:00+00:00,0.0011290701215650892,0.0011641251892481896,0.0017485809139342573,0.001631352069881216
|
203 |
+
222,2023-07-05 00:00:00+00:00,-0.008901168478966638,0.004478414058198364,0.002501496441463014,0.0011827989658368013
|
204 |
+
223,2023-07-06 00:00:00+00:00,-0.0021395864517559766,0.0058611764627231585,0.0036565448430225887,0.001330146022227763
|
205 |
+
224,2023-07-07 00:00:00+00:00,0.0037630209246945394,0.006732178919332426,0.004269963338990737,0.0015019982549252953
|
206 |
+
225,2023-07-10 00:00:00+00:00,0.012190003802724817,0.004066339744761413,0.0038109255295543996,0.0011345452793088467
|
207 |
+
226,2023-07-11 00:00:00+00:00,0.017535941836744087,0.0023210434432119494,0.002629854343745672,0.00032892300949471514
|
208 |
+
227,2023-07-12 00:00:00+00:00,-0.002418879606997362,0.0005221616073857004,0.0008860028384278253,-0.0003438385018325311
|
209 |
+
228,2023-07-13 00:00:00+00:00,0.0021132739831659537,0.001425529273998293,0.002016591348541809,-0.00018283128074954248
|
210 |
+
229,2023-07-14 00:00:00+00:00,-0.008954427362971357,0.001777822478736922,0.0014283133555490618,-0.0007136023529050085
|
211 |
+
230,2023-07-17 00:00:00+00:00,0.003547184894066345,0.003540854091934462,0.0022035983447892776,-0.0001978666447461608
|
212 |
+
231,2023-07-18 00:00:00+00:00,0.008539705780020893,0.002965589163659028,0.001514425196322111,-0.0006362371507163049
|
213 |
+
232,2023-07-19 00:00:00+00:00,0.0021604402672707928,0.0012868761941358736,-0.00011073104443805229,-0.0014363712203770873
|
214 |
+
233,2023-07-20 00:00:00+00:00,0.0038461907266579116,0.002582421220273745,-0.0009590294566189845,-0.0017041006719458536
|
215 |
+
234,2023-07-21 00:00:00+00:00,-0.00043758330710132496,0.0010854766871239015,-0.001220332336160317,-0.002705805039834434
|
216 |
+
235,2023-07-24 00:00:00+00:00,0.0005723012112549611,0.0008546272304653712,-0.0014821189295385856,-0.0022190094763525486
|
217 |
+
236,2023-07-25 00:00:00+00:00,0.0001743346387238252,6.567102596806448e-05,-0.0019195768362234092,-0.002260299823392191
|
218 |
+
237,2023-07-26 00:00:00+00:00,0.008762814062311036,-0.001481212606907419,-0.0015547883493180303,-0.0024534780384044925
|
219 |
+
238,2023-07-27 00:00:00+00:00,-0.0036649327488476146,-0.004425649673517277,-0.002346206776337394,-0.002634794257806331
|
220 |
+
239,2023-07-28 00:00:00+00:00,-0.0015728494752666152,-0.003494842540654732,-0.002828728592795025,-0.002684004602813266
|
221 |
+
240,2023-07-31 00:00:00+00:00,-0.003187417722142529,-0.003793575954993315,-0.002546766260857617,-0.002354303362638614
|
222 |
+
241,2023-08-01 00:00:00+00:00,-0.007531301137287553,-0.003917048244253206,-0.0027609202356131046,-0.0020496231736035157
|
223 |
+
242,2023-08-02 00:00:00+00:00,-0.006263170282502216,-0.001691050447785319,-0.0028147510762002066,-0.0011673403412786382
|
224 |
+
243,2023-08-03 00:00:00+00:00,0.0010174422392713002,-0.0003193889502208177,-0.00251603211750373,-0.0007131875761767107
|
225 |
+
244,2023-08-04 00:00:00+00:00,-0.0032251198312741296,-0.0022572249259871684,-0.004272105528371576,-0.0007080062535554283
|
226 |
+
245,2023-08-07 00:00:00+00:00,-0.003902053400111325,-0.001377039587405036,-0.0030074041394710406,-0.000821988694787991
|
227 |
+
246,2023-08-08 00:00:00+00:00,0.003899624840630696,-0.0016465021421528422,-0.002643138096912309,-0.0012188914085903531
|
228 |
+
247,2023-08-09 00:00:00+00:00,0.0006596539338907481,-0.003985762949633756,-0.0033696723191994423,-0.001602560549650205
|
229 |
+
248,2023-08-10 00:00:00+00:00,-0.008688326473253582,-0.004752326285689679,-0.002983565005413988,-0.0016085353313492518
|
230 |
+
249,2023-08-11 00:00:00+00:00,0.0011245334529001738,-0.006365486911837098,-0.0026109195927801585,-0.0009313562953467805
|
231 |
+
250,2023-08-14 00:00:00+00:00,-0.0051615847773311225,-0.004671905190308137,-0.002199311429380531,-0.001229843661781022
|
232 |
+
251,2023-08-15 00:00:00+00:00,-0.007981375270576718,-0.003684029521495181,-0.0013751842778620782,-0.000879653751249868
|
233 |
+
252,2023-08-16 00:00:00+00:00,-0.0032064839662822027,-0.002828684678701298,0.0005252993769284138,-0.0003553164666834776
|
234 |
+
253,2023-08-17 00:00:00+00:00,-0.01685131624010656,-0.001254677006237483,0.001142192467334756,-0.00014789496647458832
|
235 |
+
254,2023-08-18 00:00:00+00:00,0.009945849601211155,0.0011738633893805808,0.003012279841578663,0.00045847221538058964
|
236 |
+
255,2023-08-21 00:00:00+00:00,-0.00018904203294339795,0.0002742703947120698,0.0014139213300489875,-0.00018808798573131719
|
237 |
+
256,2023-08-22 00:00:00+00:00,-0.003666976380894478,0.0009558735025072285,0.00019111841931518148,-4.991336872233928e-05
|
238 |
+
257,2023-08-23 00:00:00+00:00,0.004798775042443972,0.003959739863446514,0.00017541459452963726,-0.0005074693272879067
|
239 |
+
258,2023-08-24 00:00:00+00:00,-0.004701932743806158,0.003572163901035064,-0.0002626939208185824,-0.001266906673814846
|
240 |
+
259,2023-08-25 00:00:00+00:00,0.0053660924878159615,0.004796205995451227,0.0007523310745652684,-0.0015671915499834738
|
241 |
+
260,2023-08-28 00:00:00+00:00,0.0032452563566078754,0.002548793625072,-0.00028397791027260006,-0.001837479232745608
|
242 |
+
261,2023-08-29 00:00:00+00:00,0.011122534949396442,-0.0005664398766606294,-0.0004017946934881293,-0.0021977040066625585
|
243 |
+
262,2023-08-30 00:00:00+00:00,0.002885672237027551,-0.0035505984206576926,-0.0012373589875797147,-0.00288995724310355
|
244 |
+
263,2023-08-31 00:00:00+00:00,0.0011478537106194588,-0.004025574940998672,-0.0014172105919824749,-0.00242013555253556
|
245 |
+
264,2023-09-01 00:00:00+00:00,-0.005726262443671057,-0.0032134721458900667,-0.0019974581448265745,-0.0030658330266932136
|
246 |
+
265,2023-09-05 00:00:00+00:00,-0.01218722088523481,-0.0030277974545576637,-0.0017287420965494793,-0.003533075249154455
|
247 |
+
266,2023-09-06 00:00:00+00:00,-0.003906772008041131,-0.00021230142154292117,-0.00025745275593434873,-0.003140222226237135
|
248 |
+
267,2023-09-07 00:00:00+00:00,0.0004020886904597348,0.0010900110800868674,-0.0011616212353748032,-0.002920520479548895
|
249 |
+
268,2023-09-08 00:00:00+00:00,0.005305309867926114,0.0012343005384163183,-0.0022606398369324925,-0.0033164528697848935
|
factor_data.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:904389c06f93b20202bcd0d43f2a9caaa39d5efdf3c9789da904cb71c634ce96
|
3 |
+
size 11926919
|
financial_analysis.py
ADDED
@@ -0,0 +1,205 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
import yfinance as yf
|
4 |
+
import ast
|
5 |
+
from scipy import stats
|
6 |
+
from datetime import datetime, timedelta
|
7 |
+
import pytz
|
8 |
+
import pandas_market_calendars as mcal
|
9 |
+
import alphalens as al
|
10 |
+
import matplotlib.pyplot as plt
|
11 |
+
|
12 |
+
def sentiment_to_numerical(sentiment):
|
13 |
+
mapping = {'Negative': -1, 'Positive': 1, 'Neutral': 0}
|
14 |
+
return sentiment.map(mapping)
|
15 |
+
|
16 |
+
def process_sentiment_data(sentiment_data = 'finbert_sentiment.csv', sector_ticker = 'sector_ticker.csv', prices = 'prices.csv'):
|
17 |
+
columns_to_load = ['Ticker', 'pubDate', 'finbert_output']
|
18 |
+
df = pd.read_csv(sentiment_data, usecols=columns_to_load)
|
19 |
+
df.rename(columns={'Publication Date': 'pubDate','finbert_output': 'Sentiment'}, inplace=True)
|
20 |
+
|
21 |
+
# Adjusting the dates of news articles
|
22 |
+
nyse = mcal.get_calendar('NYSE')
|
23 |
+
|
24 |
+
trading_start_hour = 9
|
25 |
+
trading_start_minute = 30
|
26 |
+
trading_end_hour = 16
|
27 |
+
trading_end_minute = 0
|
28 |
+
|
29 |
+
def adjust_date(pub_date):
|
30 |
+
if pd.isnull(pub_date) or not isinstance(pub_date, pd.Timestamp):
|
31 |
+
return pub_date
|
32 |
+
|
33 |
+
trading_start_time = pd.Timestamp(f'{pub_date.date()} {trading_start_hour}:{trading_start_minute}')
|
34 |
+
if pub_date >= trading_start_time:
|
35 |
+
next_trading_day = nyse.schedule(start_date=pub_date.date() + pd.DateOffset(days=1), end_date=pub_date.date() + pd.DateOffset(days=10)).iloc[0]['market_open']
|
36 |
+
return next_trading_day
|
37 |
+
else:
|
38 |
+
valid_days = nyse.valid_days(start_date=pub_date.date(), end_date=pub_date.date())
|
39 |
+
if not valid_days.empty and pub_date.date() == valid_days[0].date():
|
40 |
+
return pub_date
|
41 |
+
else:
|
42 |
+
next_trading_day = nyse.schedule(start_date=pub_date.date() + pd.DateOffset(days=1), end_date=pub_date.date() + pd.DateOffset(days=10)).iloc[0]['market_open']
|
43 |
+
return next_trading_day
|
44 |
+
|
45 |
+
df['pubDate'] = df['pubDate'].apply(adjust_date)
|
46 |
+
|
47 |
+
# Converting probabiltiies to one value
|
48 |
+
def convert_sentiment_to_score(sentiment):
|
49 |
+
predicted_sentiment_probabilities = {}
|
50 |
+
|
51 |
+
components = sentiment.split(', ')
|
52 |
+
for component in components:
|
53 |
+
key_value = component.split(':')
|
54 |
+
if len(key_value) == 2:
|
55 |
+
key, value = key_value
|
56 |
+
key = key.strip(" '{}").capitalize()
|
57 |
+
try:
|
58 |
+
value = float(value.strip())
|
59 |
+
except ValueError:
|
60 |
+
continue
|
61 |
+
predicted_sentiment_probabilities[key] = value
|
62 |
+
|
63 |
+
positive = predicted_sentiment_probabilities.get('Positive', 0)
|
64 |
+
negative = predicted_sentiment_probabilities.get('Negative', 0)
|
65 |
+
neutral = predicted_sentiment_probabilities.get('Neutral',0)
|
66 |
+
sentiment_score = (positive - negative)/(1 + neutral)
|
67 |
+
return sentiment_score
|
68 |
+
|
69 |
+
df['Sentiment_Score_2'] = df['Sentiment'].apply(convert_sentiment_to_score)
|
70 |
+
|
71 |
+
# replacing invalid tickers
|
72 |
+
df['pubDate'] = pd.to_datetime(df['pubDate'], utc=True, format='ISO8601')
|
73 |
+
df['pubDate'] = df['pubDate'].dt.date
|
74 |
+
|
75 |
+
print(df['pubDate'].dtypes)
|
76 |
+
|
77 |
+
replacements = {
|
78 |
+
'ATVI': 'ATVIX',
|
79 |
+
'ABC': 'ABG',
|
80 |
+
'FBHS': 'FBIN',
|
81 |
+
'FISV': 'FI',
|
82 |
+
'FRC': 'FRCB',
|
83 |
+
'NLOK': 'SYM.MU',
|
84 |
+
'PKI': 'PKN.SG',
|
85 |
+
'RE': 'EG',
|
86 |
+
'SIVB': 'SIVBQ',
|
87 |
+
}
|
88 |
+
|
89 |
+
df['Ticker'] = df['Ticker'].replace(replacements)
|
90 |
+
|
91 |
+
df = df[df['Ticker'] != 'SBNY']
|
92 |
+
|
93 |
+
#
|
94 |
+
aggregated_data = df.groupby(['Ticker', 'pubDate'])['Sentiment_Score_2'].mean().reset_index()
|
95 |
+
aggregated_data['pubDate'] = pd.to_datetime(aggregated_data['pubDate']).dt.tz_localize('UTC')
|
96 |
+
aggregated_data.set_index(['pubDate', 'Ticker'], inplace=True)
|
97 |
+
|
98 |
+
prices = pd.read_csv(prices, index_col=0, parse_dates=True)
|
99 |
+
|
100 |
+
#
|
101 |
+
equal_weighted_benchmark = prices.pct_change(periods=1).shift(periods=-1).mean(axis=1)
|
102 |
+
|
103 |
+
equal_weighted_benchmark_df = equal_weighted_benchmark.reset_index()
|
104 |
+
equal_weighted_benchmark_df.columns = ['date', 'equal_weighted_benchmark']
|
105 |
+
returns_5d=prices.pct_change(periods=5).shift(periods=-5)/5
|
106 |
+
returns_10d=prices.pct_change(periods=10).shift(periods=-10)/10
|
107 |
+
returns_20d=prices.pct_change(periods=20).shift(periods=-20)/20
|
108 |
+
mean_5d = returns_5d.mean(axis=1).reset_index()
|
109 |
+
mean_10d = returns_10d.mean(axis=1).reset_index()
|
110 |
+
mean_20d = returns_20d.mean(axis=1).reset_index()
|
111 |
+
mean_5d.columns = ['date', '5d_mean_return']
|
112 |
+
mean_10d.columns = ['date', '10d_mean_return']
|
113 |
+
mean_20d.columns = ['date', '20d_mean_return']
|
114 |
+
equal_weighted_benchmark_df = equal_weighted_benchmark_df.merge(mean_5d, on='date', how='left')
|
115 |
+
equal_weighted_benchmark_df = equal_weighted_benchmark_df.merge(mean_10d, on='date', how='left')
|
116 |
+
equal_weighted_benchmark_df = equal_weighted_benchmark_df.merge(mean_20d, on='date', how='left')
|
117 |
+
cut_date_min= aggregated_data.index.get_level_values('pubDate').min()
|
118 |
+
cut_date_max= aggregated_data.index.get_level_values('pubDate').max()
|
119 |
+
equal_weighted_benchmark_df = equal_weighted_benchmark_df[equal_weighted_benchmark_df.date>=cut_date_min]
|
120 |
+
equal_weighted_benchmark_df = equal_weighted_benchmark_df[equal_weighted_benchmark_df.date<=cut_date_max]
|
121 |
+
equal_weighted_benchmark_df
|
122 |
+
|
123 |
+
#
|
124 |
+
tickers = aggregated_data.index.get_level_values('Ticker').unique()
|
125 |
+
start_date = aggregated_data.index.get_level_values('pubDate').min() - pd.Timedelta(days=30)
|
126 |
+
end_date = aggregated_data.index.get_level_values('pubDate').max() + pd.Timedelta(days=30)
|
127 |
+
|
128 |
+
all_dates = prices.loc[cut_date_min:cut_date_max].index
|
129 |
+
all_tickers_dates = pd.MultiIndex.from_product([tickers, all_dates], names=['Ticker', 'Date'])
|
130 |
+
all_tickers_dates_df = pd.DataFrame(index=all_tickers_dates).reset_index()
|
131 |
+
aggregated_data_reset = aggregated_data.reset_index()
|
132 |
+
merged_data = pd.merge(all_tickers_dates_df, aggregated_data_reset, how='left', left_on=['Ticker', 'Date'], right_on=['Ticker', 'pubDate'])
|
133 |
+
sector_data = pd.read_excel('scraping.xlsx', usecols=['Ticker', 'Sector'])
|
134 |
+
merged_data = merged_data.reset_index()
|
135 |
+
merged_data = pd.merge(merged_data, sector_data, how='left', left_on='Ticker', right_on='Ticker')
|
136 |
+
|
137 |
+
#
|
138 |
+
decay_factor = 0.7
|
139 |
+
|
140 |
+
for ticker in tickers:
|
141 |
+
ticker_data = merged_data[merged_data['Ticker'] == ticker].copy()
|
142 |
+
|
143 |
+
original_nans = ticker_data['Sentiment_Score_2'].isna()
|
144 |
+
|
145 |
+
ticker_data['Sentiment_Score_2'] = ticker_data['Sentiment_Score_2'].ffill()
|
146 |
+
|
147 |
+
for i in range(1, len(ticker_data)):
|
148 |
+
if original_nans.iloc[i]:
|
149 |
+
ticker_data.iloc[i, ticker_data.columns.get_loc('Sentiment_Score_2')] = ticker_data.iloc[i - 1, ticker_data.columns.get_loc('Sentiment_Score_2')] * decay_factor
|
150 |
+
|
151 |
+
merged_data.loc[merged_data['Ticker'] == ticker, 'Sentiment_Score_2'] = ticker_data['Sentiment_Score_2']
|
152 |
+
|
153 |
+
merged_data['Sentiment_Score_2'].fillna(0, inplace=True)
|
154 |
+
merged_data.drop(columns=['pubDate'], inplace=True)
|
155 |
+
merged_data.set_index(['Date', 'Ticker'], inplace=True)
|
156 |
+
|
157 |
+
return merged_data, prices, equal_weighted_benchmark_df
|
158 |
+
|
159 |
+
# Alphalens
|
160 |
+
def alphalens_analysis(merged_data, prices):
|
161 |
+
factor_data=[]
|
162 |
+
factor_data = al.utils.get_clean_factor_and_forward_returns(
|
163 |
+
factor=merged_data['Sentiment_Score_2'],
|
164 |
+
prices=prices,
|
165 |
+
binning_by_group=False,
|
166 |
+
bins=None,
|
167 |
+
quantiles=5,
|
168 |
+
periods=(1, 5, 10, 20),
|
169 |
+
groupby=merged_data['Sector'],
|
170 |
+
)
|
171 |
+
|
172 |
+
al.tears.create_returns_tear_sheet(factor_data, long_short=True, group_neutral=False)
|
173 |
+
|
174 |
+
return factor_data
|
175 |
+
|
176 |
+
def alphalens_analysis_by_sector(factor_data):
|
177 |
+
mean_return_by_qt, std_err_by_qt = al.performance.mean_return_by_quantile(factor_data, by_group=True)
|
178 |
+
al.plotting.plot_quantile_returns_bar(mean_return_by_qt, by_group=True)
|
179 |
+
|
180 |
+
def calculate_information_ratio(factor_data, equal_weighted_benchmark_df):
|
181 |
+
# Merge the factor data with the benchmark data
|
182 |
+
factor_data = factor_data.merge(equal_weighted_benchmark_df, on='date', how='left')
|
183 |
+
|
184 |
+
# Calculate excess returns for various holding periods
|
185 |
+
factor_data['excess_return_1D'] = factor_data['1D'] - factor_data['equal_weighted_benchmark']
|
186 |
+
factor_data['excess_return_5D'] = factor_data['5D'] - factor_data['5d_mean_return']
|
187 |
+
factor_data['excess_return_10D'] = factor_data['10D'] - factor_data['10d_mean_return']
|
188 |
+
factor_data['excess_return_20D'] = factor_data['20D'] - factor_data['20d_mean_return']
|
189 |
+
|
190 |
+
# Initialize a DataFrame to store IR results
|
191 |
+
results = pd.DataFrame(index=range(1, 6), columns=['IR 1D', 'IR 5D', 'IR 10D', 'IR 20D'])
|
192 |
+
|
193 |
+
# Calculate IR for each quantile and holding period
|
194 |
+
for quantile in range(1, 6):
|
195 |
+
for period in [1, 5, 10, 20]:
|
196 |
+
column_name = f'excess_return_{period}D'
|
197 |
+
tmp = factor_data[factor_data.factor_quantile == quantile][['date', column_name]].groupby('date').mean()
|
198 |
+
ir = np.mean(tmp) / np.std(tmp) * np.sqrt(252)
|
199 |
+
results.at[quantile, f'IR {period}D'] = ir.values[0]
|
200 |
+
|
201 |
+
from IPython.display import display
|
202 |
+
display(results.style.format("{:.3f}"))
|
203 |
+
|
204 |
+
|
205 |
+
|
finbert_sentiment.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:149783b38a6b94ad2409cbe50dcac62e915dbb7cc0143b3154dc8199d4463423
|
3 |
+
size 98368182
|
merged_data.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
prices.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
CHANGED
@@ -3,4 +3,8 @@ pandas
|
|
3 |
numpy
|
4 |
openai
|
5 |
tqdm
|
6 |
-
matplotlib
|
|
|
|
|
|
|
|
|
|
3 |
numpy
|
4 |
openai
|
5 |
tqdm
|
6 |
+
matplotlib
|
7 |
+
scipy
|
8 |
+
ast
|
9 |
+
yfinance
|
10 |
+
alphalens
|
scraping.xlsx
ADDED
Binary file (48 kB). View file
|
|