File size: 4,768 Bytes
e2483e1 348d031 e2483e1 3284858 9aba56f e2483e1 99c38a1 e2483e1 99c38a1 e2483e1 99c38a1 e2483e1 99c38a1 e2483e1 9aba56f e2483e1 8ba86e5 e2483e1 99c38a1 e2483e1 99c38a1 e2483e1 99c38a1 e2483e1 99c38a1 e2483e1 99c38a1 e2483e1 99c38a1 e2483e1 99c38a1 e2483e1 99c38a1 8ba86e5 e2483e1 99c38a1 e2483e1 348d031 e2483e1 9aba56f e2483e1 9aba56f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import logging
import os
import pickle
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
from web3 import Web3
import pandas as pd
from pathlib import Path
from functools import partial
from markets import (
etl as mkt_etl,
DEFAULT_FILENAME as MARKETS_FILENAME,
)
from tools import (
etl as tools_etl,
DEFAULT_FILENAME as TOOLS_FILENAME,
update_tools_accuracy,
)
from profitability import run_profitability_analysis
from utils import get_question, current_answer
from get_mech_info import get_mech_info_last_60_days
import gc
logging.basicConfig(level=logging.INFO)
SCRIPTS_DIR = Path(__file__).parent
ROOT_DIR = SCRIPTS_DIR.parent
DATA_DIR = ROOT_DIR / "data"
def block_number_to_timestamp(block_number: int, web3: Web3) -> str:
"""Convert a block number to a timestamp."""
block = web3.eth.get_block(block_number)
timestamp = datetime.utcfromtimestamp(block["timestamp"])
return timestamp.strftime("%Y-%m-%d %H:%M:%S")
def parallelize_timestamp_conversion(df: pd.DataFrame, function: callable) -> list:
"""Parallelize the timestamp conversion."""
block_numbers = df["request_block"].tolist()
with ThreadPoolExecutor(max_workers=10) as executor:
results = list(
tqdm(executor.map(function, block_numbers), total=len(block_numbers))
)
return results
def updating_timestamps(rpc: str):
web3 = Web3(Web3.HTTPProvider(rpc))
# Get currentAnswer from FPMMS
fpmms = pd.read_parquet(DATA_DIR / MARKETS_FILENAME)
tools = pd.read_parquet(DATA_DIR / TOOLS_FILENAME)
# Get the question from the tools
logging.info("Getting the question and current answer for the tools")
tools["title"] = tools["prompt_request"].apply(lambda x: get_question(x))
tools["currentAnswer"] = tools["title"].apply(lambda x: current_answer(x, fpmms))
tools["currentAnswer"] = tools["currentAnswer"].str.replace("yes", "Yes")
tools["currentAnswer"] = tools["currentAnswer"].str.replace("no", "No")
# Convert block number to timestamp
logging.info("Converting block number to timestamp")
t_map = pickle.load(open(DATA_DIR / "t_map.pkl", "rb"))
tools["request_time"] = tools["request_block"].map(t_map)
# Identify tools with missing request_time and fill them
missing_time_indices = tools[tools["request_time"].isna()].index
if not missing_time_indices.empty:
partial_block_number_to_timestamp = partial(
block_number_to_timestamp, web3=web3
)
missing_timestamps = parallelize_timestamp_conversion(
tools.loc[missing_time_indices], partial_block_number_to_timestamp
)
# Update the original DataFrame with the missing timestamps
for i, timestamp in zip(missing_time_indices, missing_timestamps):
tools.at[i, "request_time"] = timestamp
tools["request_month_year"] = pd.to_datetime(tools["request_time"]).dt.strftime(
"%Y-%m"
)
tools["request_month_year_week"] = (
pd.to_datetime(tools["request_time"]).dt.to_period("W").astype(str)
)
# Save the tools data after the updates on the content
tools.to_parquet(DATA_DIR / TOOLS_FILENAME, index=False)
# Update t_map with new timestamps
new_timestamps = (
tools[["request_block", "request_time"]]
.dropna()
.set_index("request_block")
.to_dict()["request_time"]
)
t_map.update(new_timestamps)
with open(DATA_DIR / "t_map.pkl", "wb") as f:
pickle.dump(t_map, f)
# clean and release all memory
del tools
del fpmms
del t_map
gc.collect()
def weekly_analysis():
"""Run weekly analysis for the FPMMS project."""
rpc = "https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a"
# Run markets ETL
logging.info("Running markets ETL")
mkt_etl(MARKETS_FILENAME)
logging.info("Markets ETL completed")
# Run tools ETL
logging.info("Running tools ETL")
# This etl is saving already the tools parquet file
tools_etl(
rpcs=[rpc],
mech_info=get_mech_info_last_60_days(),
filename=TOOLS_FILENAME,
)
logging.info("Tools ETL completed")
# Run profitability analysis
logging.info("Running profitability analysis")
if os.path.exists(DATA_DIR / "fpmmTrades.parquet"):
os.remove(DATA_DIR / "fpmmTrades.parquet")
run_profitability_analysis(
rpc=rpc,
)
logging.info("Profitability analysis completed")
updating_timestamps(rpc)
logging.info("Weekly analysis files generated and saved")
if __name__ == "__main__":
# weekly_analysis()
rpc = "https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a"
updating_timestamps(rpc)
|