File size: 7,173 Bytes
e2483e1 16e2cb0 a2efc63 54916c4 16e2cb0 54916c4 6e8d51f 7208a5f e2483e1 99c38a1 e2483e1 99c38a1 e2483e1 99c38a1 e2483e1 99c38a1 e2483e1 16e2cb0 e2483e1 8ba86e5 16e2cb0 e2483e1 99c38a1 e2483e1 99c38a1 a2efc63 16e2cb0 a2efc63 16e2cb0 a2efc63 16e2cb0 e2483e1 99c38a1 e2483e1 a2efc63 e2483e1 99c38a1 e2483e1 99c38a1 e2483e1 99c38a1 e2483e1 99c38a1 e2483e1 99c38a1 16e2cb0 e2483e1 99c38a1 e2483e1 348d031 e2483e1 9aba56f 16e2cb0 dbff7e5 16e2cb0 7208a5f 16e2cb0 98c5ea8 16e2cb0 7208a5f 16e2cb0 9aba56f a2efc63 9aba56f 54916c4 16e2cb0 54916c4 9aba56f 54916c4 4160cfa 16e2cb0 9aba56f 16e2cb0 9aba56f 16e2cb0 9aba56f 16e2cb0 a2efc63 16e2cb0 a2efc63 9aba56f 6e8d51f e2483e1 98c5ea8 a2efc63 6e8d51f 54916c4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
import logging
import os
import pickle
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
from web3 import Web3
import pandas as pd
from pathlib import Path
from functools import partial
from markets import (
etl as mkt_etl,
DEFAULT_FILENAME as MARKETS_FILENAME,
)
from tools import DEFAULT_FILENAME as TOOLS_FILENAME, generate_tools_file
from profitability import run_profitability_analysis, DEFAULT_60_DAYS_AGO_TIMESTAMP
from utils import get_question, current_answer, RPC
from get_mech_info import (
get_mech_events_last_60_days,
get_mech_events_since_last_run,
update_json_files,
)
from update_tools_accuracy import compute_tools_accuracy
from cleaning_old_info import clean_old_data_from_parquet_files
import gc
logging.basicConfig(level=logging.INFO)
SCRIPTS_DIR = Path(__file__).parent
ROOT_DIR = SCRIPTS_DIR.parent
DATA_DIR = ROOT_DIR / "data"
def block_number_to_timestamp(block_number: int, web3: Web3) -> str:
"""Convert a block number to a timestamp."""
block = web3.eth.get_block(block_number)
timestamp = datetime.utcfromtimestamp(block["timestamp"])
return timestamp.strftime("%Y-%m-%d %H:%M:%S")
def parallelize_timestamp_conversion(df: pd.DataFrame, function: callable) -> list:
"""Parallelize the timestamp conversion."""
block_numbers = df["request_block"].tolist()
with ThreadPoolExecutor(max_workers=10) as executor:
results = list(
tqdm(executor.map(function, block_numbers), total=len(block_numbers))
)
return results
def add_current_answer(tools_filename: str):
# Get currentAnswer from FPMMS
fpmms = pd.read_parquet(DATA_DIR / MARKETS_FILENAME)
tools = pd.read_parquet(DATA_DIR / tools_filename)
# Get the question from the tools
logging.info("Getting the question and current answer for the tools")
tools["title"] = tools["prompt_request"].apply(lambda x: get_question(x))
tools["currentAnswer"] = tools["title"].apply(lambda x: current_answer(x, fpmms))
tools["currentAnswer"] = tools["currentAnswer"].str.replace("yes", "Yes")
tools["currentAnswer"] = tools["currentAnswer"].str.replace("no", "No")
# Save the tools data after the updates on the content
tools.to_parquet(DATA_DIR / tools_filename, index=False)
del fpmms
def updating_timestamps(rpc: str, tools_filename: str):
web3 = Web3(Web3.HTTPProvider(rpc))
tools = pd.read_parquet(DATA_DIR / tools_filename)
# Convert block number to timestamp
logging.info("Converting block number to timestamp")
t_map = pickle.load(open(DATA_DIR / "t_map.pkl", "rb"))
tools["request_time"] = tools["request_block"].map(t_map)
no_data = tools["request_time"].isna().sum()
logging.info(f"Total rows with no request time info = {no_data}")
# Identify tools with missing request_time and fill them
missing_time_indices = tools[tools["request_time"].isna()].index
if not missing_time_indices.empty:
partial_block_number_to_timestamp = partial(
block_number_to_timestamp, web3=web3
)
missing_timestamps = parallelize_timestamp_conversion(
tools.loc[missing_time_indices], partial_block_number_to_timestamp
)
# Update the original DataFrame with the missing timestamps
for i, timestamp in zip(missing_time_indices, missing_timestamps):
tools.at[i, "request_time"] = timestamp
tools["request_month_year"] = pd.to_datetime(tools["request_time"]).dt.strftime(
"%Y-%m"
)
tools["request_month_year_week"] = (
pd.to_datetime(tools["request_time"]).dt.to_period("W").astype(str)
)
# Save the tools data after the updates on the content
tools.to_parquet(DATA_DIR / tools_filename, index=False)
# Update t_map with new timestamps
new_timestamps = (
tools[["request_block", "request_time"]]
.dropna()
.set_index("request_block")
.to_dict()["request_time"]
)
t_map.update(new_timestamps)
with open(DATA_DIR / "t_map.pkl", "wb") as f:
pickle.dump(t_map, f)
# clean and release all memory
del tools
del t_map
gc.collect()
def only_new_weekly_analysis():
"""Run weekly analysis for the FPMMS project."""
rpc = RPC
# Run markets ETL
logging.info("Running markets ETL")
# mkt_etl(MARKETS_FILENAME)
logging.info("Markets ETL completed")
# New tools ETL
logging.info("Generating the mech json files")
# get only new data
latest_timestamp = get_mech_events_since_last_run()
if latest_timestamp == None:
print("Error while getting the mech events")
return
logging.info(f"Finished generating the mech json files from {latest_timestamp}")
# Run tools ETL
logging.info("Generate and parse the tools content")
# generate only new file
generate_tools_file("new_tools_info.json", "new_tools.parquet")
logging.info("Tools ETL completed")
add_current_answer("new_tools.parquet")
# # Run profitability analysis
logging.info("Running profitability analysis")
run_profitability_analysis(
rpc=rpc,
tools_filename="new_tools.parquet",
trades_filename="new_fpmmTrades.parquet",
from_timestamp=int(latest_timestamp.timestamp()),
merge=True,
)
logging.info("Profitability analysis completed")
# merge new json files with old json files
update_json_files()
try:
updating_timestamps(rpc, TOOLS_FILENAME)
except Exception as e:
logging.error("Error while updating timestamps of tools")
print(e)
clean_old_data_from_parquet_files("2024-09-22")
compute_tools_accuracy()
logging.info("Weekly analysis files generated and saved")
def weekly_analysis():
"""Run weekly analysis for the FPMMS project."""
rpc = RPC
# Run markets ETL
logging.info("Running markets ETL")
mkt_etl(MARKETS_FILENAME)
logging.info("Markets ETL completed")
# New tools ETL
logging.info("Generating the mech json files")
get_mech_events_last_60_days()
logging.info("Finished generating the mech json files")
# Run tools ETL
logging.info("Generate and parse the tools content")
generate_tools_file("tools_info.json", TOOLS_FILENAME)
logging.info("Tools ETL completed")
add_current_answer(TOOLS_FILENAME)
# Run profitability analysis
logging.info("Running profitability analysis")
run_profitability_analysis(
rpc=rpc,
tools_filename=TOOLS_FILENAME,
trades_filename="fpmmTrades.parquet",
from_timestamp=DEFAULT_60_DAYS_AGO_TIMESTAMP,
)
logging.info("Profitability analysis completed")
try:
updating_timestamps(rpc, TOOLS_FILENAME)
except Exception as e:
logging.error("Error while updating timestamps of tools")
print(e)
compute_tools_accuracy()
logging.info("Weekly analysis files generated and saved")
if __name__ == "__main__":
only_new_weekly_analysis()
# weekly_analysis()
# rpc = RPC
# updating_timestamps(rpc)
# compute_tools_accuracy()
|