Spaces:

arshy
/

weekly-analysis

Sleeping

App Files Files Community

weekly-analysis / scripts /markets.py

arshy

initial commit

0869b01 about 1 year ago

raw

history blame

6.83 kB

	# -- coding: utf-8 --
	# ------------------------------------------------------------------------------
	#
	# Copyright 2023 Valory AG
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#
	# ------------------------------------------------------------------------------

	import functools
	import warnings
	from string import Template
	from typing import Optional, Generator, Callable

	import pandas as pd
	import requests
	from tqdm import tqdm

	from typing import List, Dict


	ResponseItemType = List[Dict[str, str]]
	SubgraphResponseType = Dict[str, ResponseItemType]


	CREATOR = "0x89c5cc945dd550BcFfb72Fe42BfF002429F46Fec"
	BATCH_SIZE = 1000
	OMEN_SUBGRAPH = "https://api.thegraph.com/subgraphs/name/protofire/omen-xdai"
	FPMMS_FIELD = "fixedProductMarketMakers"
	QUERY_FIELD = "query"
	ERROR_FIELD = "errors"
	DATA_FIELD = "data"
	ID_FIELD = "id"
	ANSWER_FIELD = "currentAnswer"
	QUESTION_FIELD = "question"
	OUTCOMES_FIELD = "outcomes"
	TITLE_FIELD = "title"
	MAX_UINT_HEX = "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
	DEFAULT_FILENAME = "fpmms.csv"

	FPMMS_QUERY = Template(
	"""
	{
	${fpmms_field}(
	where: {
	creator: "${creator}",
	id_gt: "${fpmm_id}",
	isPendingArbitration: false
	},
	orderBy: ${id_field}
	first: ${first}
	){
	${id_field}
	${answer_field}
	${question_field} {
	${outcomes_field}
	}
	${title_field}
	}
	}
	"""
	)


	class RetriesExceeded(Exception):
	"""Exception to raise when retries are exceeded during data-fetching."""

	def __init__(
	self, msg="Maximum retries were exceeded while trying to fetch the data!"
	):
	super().__init__(msg)


	def hacky_retry(func: Callable, n_retries: int = 3) -> Callable:
	"""Create a hacky retry strategy.
	Unfortunately, we cannot use `requests.packages.urllib3.util.retry.Retry`,
	because the subgraph does not return the appropriate status codes in case of failure.
	Instead, it always returns code 200. Thus, we raise exceptions manually inside `make_request`,
	catch those exceptions in the hacky retry decorator and try again.
	Finally, if the allowed number of retries is exceeded, we raise a custom `RetriesExceeded` exception.

	:param func: the input request function.
	:param n_retries: the maximum allowed number of retries.
	:return: The request method with the hacky retry strategy applied.
	"""

	@functools.wraps(func)
	def wrapper_hacky_retry(args, *kwargs) -> SubgraphResponseType:
	"""The wrapper for the hacky retry.

	:return: a response dictionary.
	"""
	retried = 0

	while retried <= n_retries:
	try:
	if retried > 0:
	warnings.warn(f"Retrying {retried}/{n_retries}...")

	return func(args, *kwargs)
	except (ValueError, ConnectionError) as e:
	warnings.warn(e.args[0])
	finally:
	retried += 1

	raise RetriesExceeded()

	return wrapper_hacky_retry


	@hacky_retry
	def query_subgraph(url: str, query: str, key: str) -> SubgraphResponseType:
	"""Query a subgraph.

	Args:
	url: the subgraph's URL.
	query: the query to be used.
	key: the key to use in order to access the required data.

	Returns:
	a response dictionary.
	"""
	content = {QUERY_FIELD: query}
	headers = {
	"Accept": "application/json",
	"Content-Type": "application/json",
	}
	res = requests.post(url, json=content, headers=headers)

	if res.status_code != 200:
	raise ConnectionError(
	"Something went wrong while trying to communicate with the subgraph "
	f"(Error: {res.status_code})!\n{res.text}"
	)

	body = res.json()
	if ERROR_FIELD in body.keys():
	raise ValueError(f"The given query is not correct: {body[ERROR_FIELD]}")

	data = body.get(DATA_FIELD, {}).get(key, None)
	if data is None:
	raise ValueError(f"Unknown error encountered!\nRaw response: \n{body}")

	return data


	def fpmms_fetcher() -> Generator[ResponseItemType, int, None]:
	"""An indefinite fetcher for the FPMMs."""
	while True:
	fpmm_id = yield
	fpmms_query = FPMMS_QUERY.substitute(
	creator=CREATOR,
	fpmm_id=fpmm_id,
	fpmms_field=FPMMS_FIELD,
	first=BATCH_SIZE,
	id_field=ID_FIELD,
	answer_field=ANSWER_FIELD,
	question_field=QUESTION_FIELD,
	outcomes_field=OUTCOMES_FIELD,
	title_field=TITLE_FIELD,
	)
	yield query_subgraph(OMEN_SUBGRAPH, fpmms_query, FPMMS_FIELD)


	def fetch_fpmms() -> pd.DataFrame:
	"""Fetch all the fpmms of the creator."""
	latest_id = ""
	fpmms = []
	fetcher = fpmms_fetcher()
	for _ in tqdm(fetcher, unit="fpmms", unit_scale=BATCH_SIZE):
	batch = fetcher.send(latest_id)
	if len(batch) == 0:
	break

	latest_id = batch[-1].get(ID_FIELD, "")
	if latest_id == "":
	raise ValueError(f"Unexpected data format retrieved: {batch}")

	fpmms.extend(batch)

	return pd.DataFrame(fpmms)


	def get_answer(fpmm: pd.Series) -> str:
	"""Get an answer from its index, using Series of an FPMM."""
	return fpmm[QUESTION_FIELD][OUTCOMES_FIELD][fpmm[ANSWER_FIELD]]


	def transform_fpmms(fpmms: pd.DataFrame) -> pd.DataFrame:
	"""Transform an FPMMS dataframe."""
	transformed = fpmms.dropna()
	transformed = transformed.drop_duplicates([ID_FIELD])
	transformed = transformed.loc[transformed[ANSWER_FIELD] != MAX_UINT_HEX]
	transformed.loc[:, ANSWER_FIELD] = (
	transformed[ANSWER_FIELD].str.slice(-1).astype(int)
	)
	transformed.loc[:, ANSWER_FIELD] = transformed.apply(get_answer, axis=1)
	transformed = transformed.drop(columns=[QUESTION_FIELD])

	return transformed


	def etl(filename: Optional[str] = None) -> pd.DataFrame:
	"""Fetch, process, store and return the markets as a Dataframe."""
	fpmms = fetch_fpmms()
	fpmms = transform_fpmms(fpmms)

	if filename:
	fpmms.to_csv(filename, index=False)

	return fpmms


	if __name__ == "__main__":
	etl(DEFAULT_FILENAME)