File size: 5,071 Bytes
e2483e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99c38a1
e2483e1
 
 
 
 
 
 
 
99c38a1
e2483e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99c38a1
e2483e1
 
99c38a1
e2483e1
 
 
 
 
99c38a1
 
e2483e1
 
 
 
99c38a1
e2483e1
99c38a1
 
 
e2483e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99c38a1
 
e2483e1
 
 
 
 
 
 
 
8ba86e5
 
e2483e1
 
 
 
 
 
8ba86e5
 
e2483e1
 
 
99c38a1
 
e2483e1
99c38a1
 
e2483e1
 
 
 
99c38a1
e2483e1
 
99c38a1
e2483e1
99c38a1
 
 
 
 
 
 
e2483e1
 
99c38a1
e2483e1
99c38a1
 
 
 
 
 
e2483e1
99c38a1
8ba86e5
e2483e1
 
99c38a1
 
 
 
 
 
e2483e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import logging
import re
import os
import pickle
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
from web3 import Web3
from typing import Optional
import pandas as pd
from pathlib import Path
from functools import partial
from markets import (
    etl as mkt_etl,
    DEFAULT_FILENAME as MARKETS_FILENAME,
)
from tools import (
    etl as tools_etl,
    DEFAULT_FILENAME as TOOLS_FILENAME,
)
from profitability import run_profitability_analysis

import gc

logging.basicConfig(level=logging.INFO)

SCRIPTS_DIR = Path(__file__).parent
ROOT_DIR = SCRIPTS_DIR.parent
DATA_DIR = ROOT_DIR / "data"


def get_question(text: str) -> str:
    """Get the question from a text."""
    # Regex to find text within double quotes
    pattern = r'"([^"]*)"'

    # Find all occurrences
    questions = re.findall(pattern, text)

    # Assuming you want the first question if there are multiple
    question = questions[0] if questions else None

    return question


def current_answer(text: str, fpmms: pd.DataFrame) -> Optional[str]:
    """Get the current answer for a question."""
    row = fpmms[fpmms["title"] == text]
    if row.shape[0] == 0:
        return None
    return row["currentAnswer"].values[0]


def block_number_to_timestamp(block_number: int, web3: Web3) -> str:
    """Convert a block number to a timestamp."""
    block = web3.eth.get_block(block_number)
    timestamp = datetime.utcfromtimestamp(block["timestamp"])
    return timestamp.strftime("%Y-%m-%d %H:%M:%S")


def parallelize_timestamp_conversion(df: pd.DataFrame, function: callable) -> list:
    """Parallelize the timestamp conversion."""
    block_numbers = df["request_block"].tolist()
    with ThreadPoolExecutor(max_workers=10) as executor:
        results = list(
            tqdm(executor.map(function, block_numbers), total=len(block_numbers))
        )
    return results


def weekly_analysis():
    """Run weekly analysis for the FPMMS project."""
    rpc = "https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a"
    web3 = Web3(Web3.HTTPProvider(rpc))

    # Run markets ETL
    logging.info("Running markets ETL")
    mkt_etl(MARKETS_FILENAME)
    logging.info("Markets ETL completed")

    # Run tools ETL
    logging.info("Running tools ETL")

    # This etl is saving already the tools parquet file
    tools_etl(
        rpcs=[rpc],
        filename=TOOLS_FILENAME,
    )
    logging.info("Tools ETL completed")

    # Run profitability analysis
    logging.info("Running profitability analysis")
    if os.path.exists(DATA_DIR / "fpmmTrades.parquet"):
        os.remove(DATA_DIR / "fpmmTrades.parquet")
    run_profitability_analysis(
        rpc=rpc,
    )
    logging.info("Profitability analysis completed")

    # Get currentAnswer from FPMMS
    fpmms = pd.read_parquet(DATA_DIR / MARKETS_FILENAME)
    tools = pd.read_parquet(DATA_DIR / TOOLS_FILENAME)

    # Get the question from the tools
    logging.info("Getting the question and current answer for the tools")
    tools["title"] = tools["prompt_request"].apply(lambda x: get_question(x))
    tools["currentAnswer"] = tools["title"].apply(lambda x: current_answer(x, fpmms))

    tools["currentAnswer"] = tools["currentAnswer"].str.replace("yes", "Yes")
    tools["currentAnswer"] = tools["currentAnswer"].str.replace("no", "No")

    # Convert block number to timestamp
    logging.info("Converting block number to timestamp")
    t_map = pickle.load(open(DATA_DIR / "t_map.pkl", "rb"))
    tools["request_time"] = tools["request_block"].map(t_map)

    # Identify tools with missing request_time and fill them
    missing_time_indices = tools[tools["request_time"].isna()].index
    if not missing_time_indices.empty:
        partial_block_number_to_timestamp = partial(
            block_number_to_timestamp, web3=web3
        )
        missing_timestamps = parallelize_timestamp_conversion(
            tools.loc[missing_time_indices], partial_block_number_to_timestamp
        )

        # Update the original DataFrame with the missing timestamps
        for i, timestamp in zip(missing_time_indices, missing_timestamps):
            tools.at[i, "request_time"] = timestamp

    tools["request_month_year"] = pd.to_datetime(tools["request_time"]).dt.strftime(
        "%Y-%m"
    )
    tools["request_month_year_week"] = (
        pd.to_datetime(tools["request_time"]).dt.to_period("W").astype(str)
    )

    # Save the tools data after the updates on the content
    tools.to_parquet(DATA_DIR / TOOLS_FILENAME, index=False)

    # Update t_map with new timestamps
    new_timestamps = (
        tools[["request_block", "request_time"]]
        .dropna()
        .set_index("request_block")
        .to_dict()["request_time"]
    )
    t_map.update(new_timestamps)

    with open(DATA_DIR / "t_map.pkl", "wb") as f:
        pickle.dump(t_map, f)
    # clean and release all memory
    del tools
    del fpmms
    del t_map
    gc.collect()

    logging.info("Weekly analysis files generated and saved")


if __name__ == "__main__":
    weekly_analysis()