rosacastillo's picture
updating scripts for data collection with new tools pipeline
54916c4
raw
history blame
8.44 kB
from string import Template
from typing import Any
from datetime import datetime, timedelta, UTC
from utils import SUBGRAPH_API_KEY, measure_execution_time, DATA_DIR
import requests
from mech_request_utils import (
collect_all_mech_delivers,
collect_all_mech_requests,
clean_mech_delivers,
fix_duplicate_requestIds,
merge_requests_delivers,
get_ipfs_data,
only_delivers_loop,
)
OLD_MECH_SUBGRAPH_URL = (
"https://api.thegraph.com/subgraphs/name/stakewise/ethereum-gnosis"
)
NETWORK_SUBGRAPH_URL = Template(
"""https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/FxV6YUix58SpYmLBwc9gEHkwjfkqwe1X5FJQjn8nKPyA"""
)
SUBGRAPH_HEADERS = {
"Accept": "application/json, multipart/mixed",
"Content-Type": "application/json",
}
QUERY_BATCH_SIZE = 1000
DATETIME_60_DAYS_AGO = datetime.now(UTC) - timedelta(days=60)
DATETIME_10_DAYS_AGO = datetime.now(UTC) - timedelta(days=10)
DATETIME_10_HOURS_AGO = datetime.now(UTC) - timedelta(hours=10)
BLOCK_NUMBER = Template(
"""
{
blocks(
first: 1,
orderBy: timestamp,
orderDirection: asc,
where: {
timestamp_gte: "${timestamp_from}",
timestamp_lte: "${timestamp_to}"
}
){
id,
number,
}
}
"""
)
LATEST_BLOCK_QUERY = """
{
blocks(
first: 1,
orderBy: timestamp,
orderDirection: desc,
){
id,
number,
}
}
"""
def fetch_last_block_number() -> dict:
# print(f"Sending query for the subgraph = {query}")
network_subgraph_url = NETWORK_SUBGRAPH_URL.substitute(
subgraph_api_key=SUBGRAPH_API_KEY
)
query = LATEST_BLOCK_QUERY
response = requests.post(
network_subgraph_url,
headers=SUBGRAPH_HEADERS,
json={"query": query},
timeout=300,
)
result_json = response.json()
print(f"Response of the query={result_json}")
blocks = result_json.get("data", {}).get("blocks", "")
if len(blocks) == 0:
raise ValueError(f"The query {query} did not return any results")
return blocks[0]
def fetch_block_number(timestamp_from: int, timestamp_to: int) -> dict:
"""Get a block number by its timestamp margins."""
query = BLOCK_NUMBER.substitute(
timestamp_from=timestamp_from, timestamp_to=timestamp_to
)
# print(f"Sending query for the subgraph = {query}")
network_subgraph_url = NETWORK_SUBGRAPH_URL.substitute(
subgraph_api_key=SUBGRAPH_API_KEY
)
response = requests.post(
network_subgraph_url,
headers=SUBGRAPH_HEADERS,
json={"query": query},
timeout=300,
)
# print(f"block query: {query}")
result_json = response.json()
# print(f"Response of the query={result_json}")
blocks = result_json.get("data", {}).get("blocks", "")
if len(blocks) == 0:
raise ValueError(f"The query {query} did not return any results")
return blocks[0]
def get_mech_info_2024() -> dict[str, Any]:
"""Query the subgraph to get the 2024 information from mech."""
date = "2024-01-01"
datetime_jan_2024 = datetime.strptime(date, "%Y-%m-%d")
timestamp_jan_2024 = int(datetime_jan_2024.timestamp())
margin = timedelta(seconds=5)
timestamp_jan_2024_plus_margin = int((datetime_jan_2024 + margin).timestamp())
jan_block_number = fetch_block_number(
timestamp_jan_2024, timestamp_jan_2024_plus_margin
)
# expecting only one block
jan_block_number = jan_block_number.get("number", "")
if jan_block_number.isdigit():
jan_block_number = int(jan_block_number)
if jan_block_number == "":
raise ValueError(
"Could not find a valid block number for the first of January 2024"
)
MECH_TO_INFO = {
# this block number is when the creator had its first tx ever, and after this mech's creation
"0xff82123dfb52ab75c417195c5fdb87630145ae81": (
"old_mech_abi.json",
jan_block_number,
),
# this block number is when this mech was created
"0x77af31de935740567cf4ff1986d04b2c964a786a": (
"new_mech_abi.json",
jan_block_number,
),
}
return MECH_TO_INFO
def get_last_block_number() -> int:
last_block_number = fetch_last_block_number()
# expecting only one block
last_block_number = last_block_number.get("number", "")
if last_block_number.isdigit():
last_block_number = int(last_block_number)
if last_block_number == "":
raise ValueError("Could not find a valid block number for last month data")
return last_block_number
def get_last_60_days_block_number() -> int:
timestamp_60_days_ago = int((DATETIME_60_DAYS_AGO).timestamp())
margin = timedelta(seconds=5)
timestamp_60_days_ago_plus_margin = int((DATETIME_60_DAYS_AGO + margin).timestamp())
last_month_block_number = fetch_block_number(
timestamp_60_days_ago, timestamp_60_days_ago_plus_margin
)
# expecting only one block
last_month_block_number = last_month_block_number.get("number", "")
if last_month_block_number.isdigit():
last_month_block_number = int(last_month_block_number)
if last_month_block_number == "":
raise ValueError("Could not find a valid block number for last month data")
return last_month_block_number
def get_mech_info_last_60_days() -> dict[str, Any]:
"""Query the subgraph to get the last 60 days of information from mech."""
last_month_block_number = get_last_60_days_block_number()
MECH_TO_INFO = {
# this block number is when the creator had its first tx ever, and after this mech's creation
"0xff82123dfb52ab75c417195c5fdb87630145ae81": (
"old_mech_abi.json",
last_month_block_number,
),
# this block number is when this mech was created
"0x77af31de935740567cf4ff1986d04b2c964a786a": (
"new_mech_abi.json",
last_month_block_number,
),
}
print(f"last 60 days block number {last_month_block_number}")
return MECH_TO_INFO
def get_mech_info_last_10_days() -> dict[str, Any]:
"""Query the subgraph to get the last 10 days of information from mech."""
timestamp_10_days_ago = int((DATETIME_10_DAYS_AGO).timestamp())
margin = timedelta(seconds=5)
timestamp_10_days_ago_plus_margin = int((DATETIME_10_DAYS_AGO + margin).timestamp())
last_month_block_number = fetch_block_number(
timestamp_10_days_ago, timestamp_10_days_ago_plus_margin
)
# expecting only one block
last_month_block_number = last_month_block_number.get("number", "")
if last_month_block_number.isdigit():
last_month_block_number = int(last_month_block_number)
if last_month_block_number == "":
raise ValueError("Could not find a valid block number for last month data")
MECH_TO_INFO = {
# this block number is when the creator had its first tx ever, and after this mech's creation
"0xff82123dfb52ab75c417195c5fdb87630145ae81": (
"old_mech_abi.json",
last_month_block_number,
),
# this block number is when this mech was created
"0x77af31de935740567cf4ff1986d04b2c964a786a": (
"new_mech_abi.json",
last_month_block_number,
),
}
print(f"last 10 days block number {last_month_block_number}")
return MECH_TO_INFO
@measure_execution_time
def get_mech_events_last_60_days():
earliest_block_number = get_last_60_days_block_number()
last_block_number = get_last_block_number()
# mech requests
requests_dict, duplicatedReqId = collect_all_mech_requests(
from_block=earliest_block_number, to_block=last_block_number
)
# mech delivers
delivers_dict, duplicatedIds = collect_all_mech_delivers(
from_block=earliest_block_number, to_block=last_block_number
)
# clean delivers
clean_mech_delivers()
# solve duplicated requestIds
block_map = fix_duplicate_requestIds()
# merge the two files into one source
not_found = merge_requests_delivers()
# Add ipfs contents
get_ipfs_data()
if __name__ == "__main__":
get_mech_events_last_60_days()
# result = get_mech_info_last_60_days()
# print(result)