# ---------------------- Library Imports ---------------------- import time import os import json import pandas as pd import logging import requests from dotenv import load_dotenv # ---------------------- Environment Variables ---------------------- load_dotenv() url_etherscan = os.getenv("URL_ETHERSCAN") api_key_etherscan = os.getenv("API_KEY_ETHERSCAN") log_folder = os.getenv("LOG_FOLDER") os.makedirs(log_folder, exist_ok=True) log_file = os.path.join(log_folder, "etherscan_scrap.log") log_format = "%(asctime)s [%(levelname)s] - %(message)s" logging.basicConfig(filename=log_file, level=logging.INFO, format=log_format) # Load the JSON file into a dictionary with open("ressources/dict_tokens_addr.json", "r") as file: dict_addresses = json.load(file) L_created = [] L_updated = [] n_blocks = 20000 n_loop = n_blocks // 10_000 # ---------------------- Helper Functions ---------------------- def log_execution_time(func): def wrapper(*args, **kwargs): start_time = time.time() result = func(*args, **kwargs) end_time = time.time() logging.info(f"Function {func.__name__} executed in {end_time - start_time:.2f} seconds") return result return wrapper def latest_block(start_block=None): params = { "module": "proxy", "action": "eth_blockNumber", "apikey": api_key_etherscan } response = requests.get(url_etherscan, params=params) if response.status_code == 200: try: latest_block_number = int(response.json()["result"], 16) if start_block is not None: return latest_block_number, latest_block_number - start_block return latest_block_number except (ValueError, KeyError): logging.error(f"Invalid response format or missing data in response: {response.json()}") return None, None else: logging.error(f"API call failed with status code {response.status_code}: {response.json()}") return None, None def get_coin_data(contractAddr, n): latest_block_number = latest_block() if latest_block_number is None: logging.error(f"Could not retrieve latest block number for contract address {contractAddr}") return pd.DataFrame() # Return an empty DataFrame df_transactions = pd.DataFrame() transactions_per_call = 10_000 for i in range(n): start_block = latest_block_number - (n - i) * transactions_per_call end_block = latest_block_number - (n - 1 - i) * transactions_per_call params = { "module": "account", "action": "tokentx", "contractaddress": contractAddr, "startblock": start_block, "endblock": end_block, "sort": "asc", "apikey": api_key_etherscan } response = requests.get(url_etherscan, params=params) transactions = response.json().get("result", []) if not isinstance(transactions, list) or not all(isinstance(item, dict) for item in transactions): logging.error(f"Invalid data format for transactions: {transactions}") continue # Skip this iteration if transactions data is invalid df_temp = pd.DataFrame(transactions) if not df_temp.empty: df_transactions = pd.concat([df_transactions, df_temp]) time.sleep(1) if 'timeStamp' in df_transactions: df_transactions['timeStamp'] = pd.to_datetime(df_transactions['timeStamp'].astype(int), unit='s') else: logging.error("'timeStamp' key not found in the response data.") return pd.DataFrame() # Return an empty DataFrame if key is missing df_transactions['value'] = df_transactions['value'].astype(float) / 1e18 return df_transactions # ---------------------- Main Function ---------------------- @log_execution_time def fetch_and_update_etherscan(): for tokenSymbol, contractAddr in dict_addresses.items(): file = f"output/transactions_{tokenSymbol}.csv" if not os.path.exists(file): L_created.append(file) df_transactions = get_coin_data(contractAddr, n_loop) df_transactions_no_dup = df_transactions.drop(["confirmations", "input"], axis=1).drop_duplicates(subset="hash") df_transactions_no_dup.to_csv(file, sep=",", index=False) else: L_updated.append(file) df_temp = pd.read_csv(file, sep=",") df_temp = df_temp.sort_values("blockNumber", ascending=False) start_block = df_temp["blockNumber"].iloc[0] latest_block_number, diff = latest_block(start_block) if latest_block_number is None: logging.error(f"Failed to retrieve latest block number for token: {tokenSymbol}") continue n_loop_to_concat = (diff // 10000) + 1 df_transactions = get_coin_data(contractAddr, n_loop_to_concat) df_latest = pd.concat([df_transactions, df_temp]).drop(["confirmations", "input"], axis=1) df_latest_no_dup = df_latest.drop_duplicates(subset="hash") df_latest_no_dup.loc[:, "blockNumber"] = df_latest_no_dup["blockNumber"].astype(int) df_latest_no_dup = df_latest_no_dup.sort_values(by="blockNumber") df_latest_no_dup.to_csv(file, sep=",", index=False) logging.info("Created files: " + ", ".join(L_created)) logging.info("Updated files: " + ", ".join(L_updated)) logging.info("Etherscan scraping script execution completed.") # ---------------------- Script Execution ---------------------- if __name__ == "__main__": fetch_and_update_etherscan()