rosacastillo commited on
Commit
7208a5f
·
1 Parent(s): 42514f7

new data and removing old info script

Browse files
data/all_trades_profitability.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3068c54295d43c0d40f331cf3ad988fb8bf150bed0c948d3103161d7d7065f38
3
- size 3292156
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:223f85e66279e8e12547e53f16efb0af7c9c902578b1cc529c878f7ee7379ce6
3
+ size 3551233
data/fpmmTrades.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59bd9c58e15de9dcb4ae76cd8adca7750b460abfa2bdf79ee5042d3e3b5c396e
3
- size 13934569
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a55fcd7ac6fe8e8008fd8395edd39cc3ac0a05ab202f4296c22943f42f33470
3
+ size 18283162
data/fpmms.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16b0570a9c07e0ef5d137ede96584fcfe1645a784a7380a83b9bdfa5829ad3e2
3
- size 515347
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61ce94d0b622f72f695a244890bf91ae6286e793bd65cb5393a3d8bc810ff1a5
3
+ size 525473
data/invalid_trades.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18f4b47e3c764b8c7f157b4b408d0c97e3436f58d86eb39edecf2a7cf2748a21
3
- size 84033
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4400412e24b55e93d070b2a153e5a8c1a13f2f6f4c22c15eeb1aa7c2cce2e151
3
+ size 101019
data/new_fpmmTrades.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84442b8ab800d01ec66e0c78efebfc31a5d954d3fdddfaf5fab41a75030a3967
3
- size 3267040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2179890c22497ae3572341fed5eb8b73bddc90f27c7b5c60037edd8170e74e51
3
+ size 4542500
data/new_tools.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67da51ef342b56056a5bfb49f78ecab7354731e2cd88d16ccbffeaa141e175ec
3
- size 64443733
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6afba2f5c6e14a81abbf1f936ddb0f83ada76c4515a6daf86fbc01ceba1a6e32
3
+ size 79645959
data/outliers.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72326e188a845663048e6ebf368045dfc387eac9a54a38303e9020f5ca112ad6
3
- size 18966
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:494d9e9d0a20fcac4699dd4bf7bc658a50a124e8e33060997dd5eed0ce4863bc
3
+ size 18711
data/summary_profitability.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:522003ca4b76df815bf662ebc92478bc103652ac9f82dc82718578c26c650509
3
- size 87497
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ef6ed929a47113c2afac034356a094b8af089bb03f9e5a673c6cc2f27978120
3
+ size 94924
data/t_map.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69d6fcd0360c5bbd646fa748b3f5a1e4bcccae358f32c85aa96509cdb6319c76
3
- size 24153722
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed3bcdd7a1c5ed48707a5fdb0dd16775d2ba347c7416fe6d4b5dc3a71dfea212
3
+ size 25526067
data/tools.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa6d6407da787ae9d2ed80233d939f57feae82cd66a8193937b861c601f24828
3
- size 406224765
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e4acfa4aa1c8c80150269caf40fe6ff16e7aff3c90e2c4ca809d29357a6c36b
3
+ size 448917044
data/tools_accuracy.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0113d5cbc3c5eb981b5f1c5a7776f616fc52f44b15b5f96880a16989fa07d16
3
- size 1240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:174f3deae7eb082b6623e409771f17f538e94f74bb4fcfa62ea211c60ff3404a
3
+ size 1339
scripts/cleaning_old_info.py CHANGED
@@ -63,4 +63,4 @@ def clean_old_data_from_parquet_files(cutoff_date: str):
63
 
64
 
65
  if __name__ == "__main__":
66
- clean_old_data_from_parquet_files("2024-09-15")
 
63
 
64
 
65
  if __name__ == "__main__":
66
+ clean_old_data_from_parquet_files("2024-09-22")
scripts/get_mech_info.py CHANGED
@@ -330,6 +330,8 @@ def get_mech_events_since_last_run():
330
  to_block=last_block_number,
331
  filename="new_mech_delivers.json",
332
  )
 
 
333
  # clean delivers
334
  clean_mech_delivers("new_mech_requests.json", "new_mech_delivers.json")
335
 
 
330
  to_block=last_block_number,
331
  filename="new_mech_delivers.json",
332
  )
333
+ if delivers_dict is None:
334
+ return None
335
  # clean delivers
336
  clean_mech_delivers("new_mech_requests.json", "new_mech_delivers.json")
337
 
scripts/mech_request_utils.py CHANGED
@@ -21,6 +21,7 @@
21
  import json
22
  import time
23
  import pickle
 
24
  from typing import Any, Dict, List, Tuple
25
  from pathlib import Path
26
  import requests
@@ -124,9 +125,10 @@ def collect_all_mech_requests(from_block: int, to_block: int, filename: str) ->
124
  "blockNumber_lte": str(to_block), # str
125
  }
126
  try:
127
- response = client.execute(
128
- gql(REQUESTS_QUERY_FILTER), variable_values=variables
129
- )
 
130
 
131
  items = response.get("requests", [])
132
 
@@ -154,10 +156,21 @@ def collect_all_mech_requests(from_block: int, to_block: int, filename: str) ->
154
  return mech_requests, duplicated_reqIds
155
 
156
 
 
 
 
 
 
 
 
 
 
 
 
157
  def collect_all_mech_delivers(from_block: int, to_block: int, filename: str) -> Tuple:
158
 
159
  print(f"Fetching all mech delivers from {from_block} to {to_block}")
160
- # TODO save as new json file, check how to merge later json files
161
  mech_delivers = {}
162
  duplicated_requestIds = []
163
  transport = RequestsHTTPTransport(url=THEGRAPH_ENDPOINT)
@@ -173,9 +186,10 @@ def collect_all_mech_delivers(from_block: int, to_block: int, filename: str) ->
173
  "blockNumber_lte": str(to_block), # str
174
  }
175
  try:
176
- response = client.execute(
177
- gql(DELIVERS_QUERY_NO_FILTER), variable_values=variables
178
- )
 
179
  items = response.get("delivers", [])
180
 
181
  if not items:
@@ -187,10 +201,9 @@ def collect_all_mech_delivers(from_block: int, to_block: int, filename: str) ->
187
  else:
188
  duplicated_requestIds.append(mech_deliver["requestId"])
189
  # we will handle the duplicated later
190
- mech_delivers[mech_deliver["requestId"]].append(mech_deliver)
191
  except Exception as e:
192
  print(f"Error while getting the response: {e}")
193
- return
194
 
195
  id_gt = items[-1]["id"]
196
  time.sleep(IPFS_POLL_INTERVAL)
@@ -219,9 +232,7 @@ def collect_missing_delivers(request_id: int, block_number: int) -> Dict[str, An
219
  "blockNumber_lte": str(to_block), # str
220
  }
221
  try:
222
- response = client.execute(
223
- gql(MISSING_DELIVERS_QUERY), variable_values=variables
224
- )
225
  items = response.get("delivers", [])
226
  # If the user sends requests with the same values (tool, prompt, nonce) it
227
  # will generate the same requestId. Therefore, multiple items can be retrieved
 
21
  import json
22
  import time
23
  import pickle
24
+ from random import uniform
25
  from typing import Any, Dict, List, Tuple
26
  from pathlib import Path
27
  import requests
 
125
  "blockNumber_lte": str(to_block), # str
126
  }
127
  try:
128
+ # response = client.execute(
129
+ # gql(REQUESTS_QUERY_FILTER), variable_values=variables
130
+ # )
131
+ response = fetch_with_retry(client, REQUESTS_QUERY_FILTER, variables)
132
 
133
  items = response.get("requests", [])
134
 
 
156
  return mech_requests, duplicated_reqIds
157
 
158
 
159
+ def fetch_with_retry(client, query, variables, max_retries=5):
160
+ for attempt in range(max_retries):
161
+ try:
162
+ return client.execute(gql(query), variable_values=variables)
163
+ except Exception as e:
164
+ if attempt == max_retries - 1:
165
+ raise e
166
+ wait_time = (2**attempt) + uniform(0, 1) # exponential backoff with jitter
167
+ time.sleep(wait_time)
168
+
169
+
170
  def collect_all_mech_delivers(from_block: int, to_block: int, filename: str) -> Tuple:
171
 
172
  print(f"Fetching all mech delivers from {from_block} to {to_block}")
173
+
174
  mech_delivers = {}
175
  duplicated_requestIds = []
176
  transport = RequestsHTTPTransport(url=THEGRAPH_ENDPOINT)
 
186
  "blockNumber_lte": str(to_block), # str
187
  }
188
  try:
189
+ # response = client.execute(
190
+ # gql(DELIVERS_QUERY_NO_FILTER), variable_values=variables
191
+ # )
192
+ response = fetch_with_retry(client, DELIVERS_QUERY_NO_FILTER, variables)
193
  items = response.get("delivers", [])
194
 
195
  if not items:
 
201
  else:
202
  duplicated_requestIds.append(mech_deliver["requestId"])
203
  # we will handle the duplicated later
 
204
  except Exception as e:
205
  print(f"Error while getting the response: {e}")
206
+ return None, None
207
 
208
  id_gt = items[-1]["id"]
209
  time.sleep(IPFS_POLL_INTERVAL)
 
232
  "blockNumber_lte": str(to_block), # str
233
  }
234
  try:
235
+ response = fetch_with_retry(client, MISSING_DELIVERS_QUERY, variables)
 
 
236
  items = response.get("delivers", [])
237
  # If the user sends requests with the same values (tool, prompt, nonce) it
238
  # will generate the same requestId. Therefore, multiple items can be retrieved
scripts/pull_data.py CHANGED
@@ -21,6 +21,7 @@ from get_mech_info import (
21
  update_json_files,
22
  )
23
  from update_tools_accuracy import compute_tools_accuracy
 
24
  import gc
25
 
26
  logging.basicConfig(level=logging.INFO)
@@ -131,6 +132,9 @@ def only_new_weekly_analysis():
131
  logging.info("Generating the mech json files")
132
  # get only new data
133
  latest_timestamp = get_mech_events_since_last_run()
 
 
 
134
  logging.info(f"Finished generating the mech json files from {latest_timestamp}")
135
 
136
  # Run tools ETL
@@ -162,6 +166,8 @@ def only_new_weekly_analysis():
162
  logging.error("Error while updating timestamps of tools")
163
  print(e)
164
 
 
 
165
  compute_tools_accuracy()
166
 
167
  logging.info("Weekly analysis files generated and saved")
 
21
  update_json_files,
22
  )
23
  from update_tools_accuracy import compute_tools_accuracy
24
+ from cleaning_old_info import clean_old_data_from_parquet_files
25
  import gc
26
 
27
  logging.basicConfig(level=logging.INFO)
 
132
  logging.info("Generating the mech json files")
133
  # get only new data
134
  latest_timestamp = get_mech_events_since_last_run()
135
+ if latest_timestamp == None:
136
+ print("Error while getting the mech events")
137
+ return
138
  logging.info(f"Finished generating the mech json files from {latest_timestamp}")
139
 
140
  # Run tools ETL
 
166
  logging.error("Error while updating timestamps of tools")
167
  print(e)
168
 
169
+ clean_old_data_from_parquet_files("2024-09-22")
170
+
171
  compute_tools_accuracy()
172
 
173
  logging.info("Weekly analysis files generated and saved")
tabs/trades.py CHANGED
@@ -105,7 +105,7 @@ def get_overall_winning_by_market_and_trader_type(
105
 
106
 
107
  def plot_trades_by_week(trades_df: pd.DataFrame) -> gr.BarPlot:
108
- """Plots the trades data for the given tools and calculates the winning percentage."""
109
  return gr.BarPlot(
110
  value=trades_df,
111
  x="month_year_week",
 
105
 
106
 
107
  def plot_trades_by_week(trades_df: pd.DataFrame) -> gr.BarPlot:
108
+ """Plots the weekly trades data ."""
109
  return gr.BarPlot(
110
  value=trades_df,
111
  x="month_year_week",