File size: 7,173 Bytes
e2483e1
 
 
 
 
 
 
 
 
 
 
 
 
 
16e2cb0
 
a2efc63
54916c4
 
16e2cb0
 
54916c4
6e8d51f
7208a5f
e2483e1
 
 
 
 
 
 
 
99c38a1
e2483e1
 
 
99c38a1
 
e2483e1
 
 
 
99c38a1
e2483e1
99c38a1
 
 
e2483e1
 
 
16e2cb0
e2483e1
8ba86e5
16e2cb0
e2483e1
 
 
99c38a1
 
e2483e1
99c38a1
 
a2efc63
16e2cb0
a2efc63
 
 
16e2cb0
a2efc63
 
16e2cb0
e2483e1
 
 
 
99c38a1
e2483e1
a2efc63
 
 
e2483e1
99c38a1
e2483e1
99c38a1
 
 
 
 
 
 
e2483e1
 
99c38a1
e2483e1
99c38a1
 
 
 
 
 
e2483e1
99c38a1
16e2cb0
e2483e1
 
99c38a1
 
 
 
 
 
e2483e1
 
 
 
348d031
e2483e1
 
 
 
 
9aba56f
16e2cb0
 
 
 
 
dbff7e5
16e2cb0
 
 
 
 
 
7208a5f
 
 
16e2cb0
 
 
 
 
 
 
 
 
 
98c5ea8
16e2cb0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7208a5f
 
16e2cb0
 
 
 
 
9aba56f
 
a2efc63
9aba56f
 
 
 
 
54916c4
 
16e2cb0
54916c4
 
9aba56f
54916c4
4160cfa
16e2cb0
 
9aba56f
16e2cb0
9aba56f
 
 
 
 
16e2cb0
 
 
9aba56f
 
16e2cb0
a2efc63
16e2cb0
a2efc63
 
 
9aba56f
6e8d51f
 
e2483e1
 
 
 
98c5ea8
 
a2efc63
6e8d51f
54916c4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
import logging
import os
import pickle
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
from web3 import Web3
import pandas as pd
from pathlib import Path
from functools import partial
from markets import (
    etl as mkt_etl,
    DEFAULT_FILENAME as MARKETS_FILENAME,
)
from tools import DEFAULT_FILENAME as TOOLS_FILENAME, generate_tools_file
from profitability import run_profitability_analysis, DEFAULT_60_DAYS_AGO_TIMESTAMP
from utils import get_question, current_answer, RPC
from get_mech_info import (
    get_mech_events_last_60_days,
    get_mech_events_since_last_run,
    update_json_files,
)
from update_tools_accuracy import compute_tools_accuracy
from cleaning_old_info import clean_old_data_from_parquet_files
import gc

logging.basicConfig(level=logging.INFO)

SCRIPTS_DIR = Path(__file__).parent
ROOT_DIR = SCRIPTS_DIR.parent
DATA_DIR = ROOT_DIR / "data"


def block_number_to_timestamp(block_number: int, web3: Web3) -> str:
    """Convert a block number to a timestamp."""
    block = web3.eth.get_block(block_number)
    timestamp = datetime.utcfromtimestamp(block["timestamp"])
    return timestamp.strftime("%Y-%m-%d %H:%M:%S")


def parallelize_timestamp_conversion(df: pd.DataFrame, function: callable) -> list:
    """Parallelize the timestamp conversion."""
    block_numbers = df["request_block"].tolist()
    with ThreadPoolExecutor(max_workers=10) as executor:
        results = list(
            tqdm(executor.map(function, block_numbers), total=len(block_numbers))
        )
    return results


def add_current_answer(tools_filename: str):
    # Get currentAnswer from FPMMS
    fpmms = pd.read_parquet(DATA_DIR / MARKETS_FILENAME)
    tools = pd.read_parquet(DATA_DIR / tools_filename)

    # Get the question from the tools
    logging.info("Getting the question and current answer for the tools")
    tools["title"] = tools["prompt_request"].apply(lambda x: get_question(x))
    tools["currentAnswer"] = tools["title"].apply(lambda x: current_answer(x, fpmms))

    tools["currentAnswer"] = tools["currentAnswer"].str.replace("yes", "Yes")
    tools["currentAnswer"] = tools["currentAnswer"].str.replace("no", "No")
    # Save the tools data after the updates on the content
    tools.to_parquet(DATA_DIR / tools_filename, index=False)
    del fpmms


def updating_timestamps(rpc: str, tools_filename: str):
    web3 = Web3(Web3.HTTPProvider(rpc))

    tools = pd.read_parquet(DATA_DIR / tools_filename)

    # Convert block number to timestamp
    logging.info("Converting block number to timestamp")
    t_map = pickle.load(open(DATA_DIR / "t_map.pkl", "rb"))
    tools["request_time"] = tools["request_block"].map(t_map)

    no_data = tools["request_time"].isna().sum()
    logging.info(f"Total rows with no request time info = {no_data}")

    # Identify tools with missing request_time and fill them
    missing_time_indices = tools[tools["request_time"].isna()].index
    if not missing_time_indices.empty:
        partial_block_number_to_timestamp = partial(
            block_number_to_timestamp, web3=web3
        )
        missing_timestamps = parallelize_timestamp_conversion(
            tools.loc[missing_time_indices], partial_block_number_to_timestamp
        )

        # Update the original DataFrame with the missing timestamps
        for i, timestamp in zip(missing_time_indices, missing_timestamps):
            tools.at[i, "request_time"] = timestamp

    tools["request_month_year"] = pd.to_datetime(tools["request_time"]).dt.strftime(
        "%Y-%m"
    )
    tools["request_month_year_week"] = (
        pd.to_datetime(tools["request_time"]).dt.to_period("W").astype(str)
    )

    # Save the tools data after the updates on the content
    tools.to_parquet(DATA_DIR / tools_filename, index=False)

    # Update t_map with new timestamps
    new_timestamps = (
        tools[["request_block", "request_time"]]
        .dropna()
        .set_index("request_block")
        .to_dict()["request_time"]
    )
    t_map.update(new_timestamps)

    with open(DATA_DIR / "t_map.pkl", "wb") as f:
        pickle.dump(t_map, f)

    # clean and release all memory
    del tools
    del t_map
    gc.collect()


def only_new_weekly_analysis():
    """Run weekly analysis for the FPMMS project."""
    rpc = RPC
    # Run markets ETL
    logging.info("Running markets ETL")
    # mkt_etl(MARKETS_FILENAME)
    logging.info("Markets ETL completed")

    # New tools ETL
    logging.info("Generating the mech json files")
    # get only new data
    latest_timestamp = get_mech_events_since_last_run()
    if latest_timestamp == None:
        print("Error while getting the mech events")
        return
    logging.info(f"Finished generating the mech json files from {latest_timestamp}")

    # Run tools ETL
    logging.info("Generate and parse the tools content")
    # generate only new file
    generate_tools_file("new_tools_info.json", "new_tools.parquet")
    logging.info("Tools ETL completed")

    add_current_answer("new_tools.parquet")

    # # Run profitability analysis
    logging.info("Running profitability analysis")
    run_profitability_analysis(
        rpc=rpc,
        tools_filename="new_tools.parquet",
        trades_filename="new_fpmmTrades.parquet",
        from_timestamp=int(latest_timestamp.timestamp()),
        merge=True,
    )

    logging.info("Profitability analysis completed")

    # merge new json files with old json files
    update_json_files()

    try:
        updating_timestamps(rpc, TOOLS_FILENAME)
    except Exception as e:
        logging.error("Error while updating timestamps of tools")
        print(e)

    clean_old_data_from_parquet_files("2024-09-22")

    compute_tools_accuracy()

    logging.info("Weekly analysis files generated and saved")


def weekly_analysis():
    """Run weekly analysis for the FPMMS project."""
    rpc = RPC
    # Run markets ETL
    logging.info("Running markets ETL")
    mkt_etl(MARKETS_FILENAME)
    logging.info("Markets ETL completed")

    # New tools ETL
    logging.info("Generating the mech json files")

    get_mech_events_last_60_days()
    logging.info("Finished generating the mech json files")

    # Run tools ETL
    logging.info("Generate and parse the tools content")

    generate_tools_file("tools_info.json", TOOLS_FILENAME)
    logging.info("Tools ETL completed")
    add_current_answer(TOOLS_FILENAME)

    # Run profitability analysis
    logging.info("Running profitability analysis")
    run_profitability_analysis(
        rpc=rpc,
        tools_filename=TOOLS_FILENAME,
        trades_filename="fpmmTrades.parquet",
        from_timestamp=DEFAULT_60_DAYS_AGO_TIMESTAMP,
    )
    logging.info("Profitability analysis completed")

    try:
        updating_timestamps(rpc, TOOLS_FILENAME)
    except Exception as e:
        logging.error("Error while updating timestamps of tools")
        print(e)

    compute_tools_accuracy()

    logging.info("Weekly analysis files generated and saved")


if __name__ == "__main__":
    only_new_weekly_analysis()
    # weekly_analysis()
    # rpc = RPC
    # updating_timestamps(rpc)
    # compute_tools_accuracy()