import logging import re import os from datetime import datetime from concurrent.futures import ThreadPoolExecutor from tqdm import tqdm from web3 import Web3 from typing import Optional import pandas as pd from functools import partial from markets import ( etl as mkt_etl, DEFAULT_FILENAME as MARKETS_FILENAME, ) from tools import ( etl as tools_etl, DEFAULT_FILENAME as TOOLS_FILENAME, ) from profitability import run_profitability_analysis logging.basicConfig(level=logging.INFO) def get_question(text: str) -> str: """Get the question from a text.""" # Regex to find text within double quotes pattern = r'"([^"]*)"' # Find all occurrences questions = re.findall(pattern, text) # Assuming you want the first question if there are multiple question = questions[0] if questions else None return question def current_answer(text: str, fpmms: pd.DataFrame) -> Optional[str]: """Get the current answer for a question.""" row = fpmms[fpmms['title'] == text] if row.shape[0] == 0: return None return row['currentAnswer'].values[0] def block_number_to_timestamp(block_number: int, web3: Web3) -> str: """Convert a block number to a timestamp.""" block = web3.eth.get_block(block_number) timestamp = datetime.utcfromtimestamp(block['timestamp']) return timestamp.strftime('%Y-%m-%d %H:%M:%S') def parallelize_timestamp_conversion(df: pd.DataFrame, function: callable) -> list: """Parallelize the timestamp conversion.""" block_numbers = df['request_block'].tolist() with ThreadPoolExecutor(max_workers=10) as executor: results = list(tqdm(executor.map(function, block_numbers), total=len(block_numbers))) return results def weekly_analysis(): """Run weekly analysis for the FPMMS project.""" rpc = "https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a" web3 = Web3(Web3.HTTPProvider(rpc)) # Run markets ETL logging.info("Running markets ETL") mkt_etl(MARKETS_FILENAME) logging.info("Markets ETL completed") # Run tools ETL logging.info("Running tools ETL") tools_etl( rpcs=[rpc], filename=TOOLS_FILENAME, full_contents=True, ) logging.info("Tools ETL completed") # Run profitability analysis logging.info("Running profitability analysis") if os.path.exists("fpmmTrades.csv"): os.remove("fpmmTrades.csv") run_profitability_analysis( rpc=rpc, ) logging.info("Profitability analysis completed") # Get currentAnswer from FPMMS fpmms = pd.read_csv(MARKETS_FILENAME) tools = pd.read_csv(TOOLS_FILENAME) # Get the question from the tools logging.info("Getting the question and current answer for the tools") tools['title'] = tools['prompt_request'].apply(lambda x: get_question(x)) tools['currentAnswer'] = tools['title'].apply(lambda x: current_answer(x, fpmms)) tools['currentAnswer'] = tools['currentAnswer'].str.replace('yes', 'Yes') tools['currentAnswer'] = tools['currentAnswer'].str.replace('no', 'No') # Convert block number to timestamp logging.info("Converting block number to timestamp") partial_block_number_to_timestamp = partial(block_number_to_timestamp, web3=web3) missing_timestamps = parallelize_timestamp_conversion(tools, partial_block_number_to_timestamp) tools['request_time'] = missing_timestamps tools['request_month_year'] = pd.to_datetime(tools['request_time']).dt.strftime('%Y-%m') tools['request_month_year_week'] = pd.to_datetime(tools['request_time']).dt.to_period('W').astype(str) # Save the tools tools.to_csv(TOOLS_FILENAME, index=False) logging.info("Weekly analysis files generated and saved") if __name__ == "__main__": weekly_analysis()