bkb2135 commited on
Commit
c6ce978
·
1 Parent(s): c22f824

Clean up files and Syntax

Browse files
.gitattributes DELETED
@@ -1 +0,0 @@
1
- data/wandb/tzebw6rb.parquet filter=lfs diff=lfs merge=lfs -text
 
 
assets/macrocosmos-black.png DELETED
Binary file (161 kB)
 
assets/macrocosmos-white.png DELETED
Binary file (151 kB)
 
test.py DELETED
@@ -1,17 +0,0 @@
1
- import pytest
2
-
3
-
4
- def test_query_network():
5
- pass
6
-
7
-
8
- def test_filter_completions():
9
- pass
10
-
11
-
12
- def test_guess_task_name():
13
- pass
14
-
15
-
16
- def test_ensemble_completions():
17
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils.py CHANGED
@@ -52,12 +52,12 @@ EXTRACTORS = {
52
  'created_at': lambda x: pd.Timestamp(x.created_at),
53
  'last_event_at': lambda x: pd.Timestamp(x.summary.get('_timestamp'), unit='s'),
54
 
55
- 'netuid': lambda x: x.config.get('netuid'),
56
- 'mock': lambda x: x.config.get('neuron').get('mock'),
57
- 'sample_size': lambda x: x.config.get('neuron').get('sample_size'),
58
- 'timeout': lambda x: x.config.get('neuron').get('timeout'),
59
- 'epoch_length': lambda x: x.config.get('neuron').get('epoch_length'),
60
- 'disable_set_weights': lambda x: x.config.get('neuron').get('disable_set_weights'),
61
 
62
  # This stuff is from the last logged event
63
  'num_steps': lambda x: x.summary.get('_step'),
@@ -176,8 +176,9 @@ def build_data(timestamp=None, path=BASE_PATH, min_steps=MIN_STEPS, use_cache=Tr
176
  n_events += num_steps
177
  prog_msg = f'Loading data {i/len(runs)*100:.0f}%, (total {n_events:,.0f} events)'
178
  progress.progress(i/len(runs),text=f'{prog_msg}... **downloading** `{os.path.join(*run.path)}`')
 
 
179
 
180
- run_data.append(run)
181
 
182
  progress.empty()
183
 
@@ -249,7 +250,7 @@ def get_productivity(df_runs):
249
 
250
  total_duration = df_runs.last_event_at.max() - df_runs.created_at.min()
251
  total_steps = df_runs.num_steps.sum()
252
- total_completions = (df_runs.num_steps*df_runs.sample_size).sum()
253
  total_completion_words = (df_runs.num_steps*df_runs.completion_words).sum()
254
  total_completion_tokens = round(total_completion_words/0.75)
255
  total_validator_words = (df_runs.num_steps*df_runs.apply(lambda x: len(str(x.query).split()) + len(str(x.challenge).split()) + len(str(x.reference).split()), axis=1 )).sum()
@@ -266,7 +267,7 @@ def get_productivity(df_runs):
266
  }
267
 
268
  @st.cache_data(show_spinner=False)
269
- def get_reward_stats(df, exclude_multiturn=True, freq='1D', remove_zero_rewards=True, agg='mean', date_min='2024-01-22', date_max='2024-06-25'):
270
 
271
  df = df.loc[df._timestamp.between(pd.Timestamp(date_min), pd.Timestamp(date_max))]
272
  if exclude_multiturn:
@@ -378,7 +379,7 @@ def load_state_vars(username=USERNAME, percentile=0.95):
378
 
379
  df_runs = build_data(time.time()//UPDATE_INTERVAL, use_cache=False)
380
 
381
- df_runs = df_runs.loc[df_runs.netuid.isin([1,61,102])]
382
  st.toast(f'Loaded {len(df_runs)} runs')
383
 
384
  df_vali = df_runs.loc[df_runs.username == username]
 
52
  'created_at': lambda x: pd.Timestamp(x.created_at),
53
  'last_event_at': lambda x: pd.Timestamp(x.summary.get('_timestamp'), unit='s'),
54
 
55
+ # 'netuid': lambda x: x.config.get('netuid'),
56
+ # 'mock': lambda x: x.config.get('neuron').get('mock'),
57
+ # 'sample_size': lambda x: x.config.get('neuron').get('sample_size'),
58
+ # 'timeout': lambda x: x.config.get('neuron').get('timeout'),
59
+ # 'epoch_length': lambda x: x.config.get('neuron').get('epoch_length'),
60
+ # 'disable_set_weights': lambda x: x.config.get('neuron').get('disable_set_weights'),
61
 
62
  # This stuff is from the last logged event
63
  'num_steps': lambda x: x.summary.get('_step'),
 
176
  n_events += num_steps
177
  prog_msg = f'Loading data {i/len(runs)*100:.0f}%, (total {n_events:,.0f} events)'
178
  progress.progress(i/len(runs),text=f'{prog_msg}... **downloading** `{os.path.join(*run.path)}`')
179
+ if 'netuid_1' in run.tags or 'netuid_61' in run.tags or 'netuid_102' in run.tags:
180
+ run_data.append(run)
181
 
 
182
 
183
  progress.empty()
184
 
 
250
 
251
  total_duration = df_runs.last_event_at.max() - df_runs.created_at.min()
252
  total_steps = df_runs.num_steps.sum()
253
+ total_completions = (df_runs.num_steps*100).sum() #TODO: Parse from df
254
  total_completion_words = (df_runs.num_steps*df_runs.completion_words).sum()
255
  total_completion_tokens = round(total_completion_words/0.75)
256
  total_validator_words = (df_runs.num_steps*df_runs.apply(lambda x: len(str(x.query).split()) + len(str(x.challenge).split()) + len(str(x.reference).split()), axis=1 )).sum()
 
267
  }
268
 
269
  @st.cache_data(show_spinner=False)
270
+ def get_reward_stats(df, exclude_multiturn=True, freq='D', remove_zero_rewards=True, agg='mean', date_min='2024-01-22', date_max='2024-08-12'): #TODO: Set the date_max to the current date
271
 
272
  df = df.loc[df._timestamp.between(pd.Timestamp(date_min), pd.Timestamp(date_max))]
273
  if exclude_multiturn:
 
379
 
380
  df_runs = build_data(time.time()//UPDATE_INTERVAL, use_cache=False)
381
 
382
+ # df_runs = df_runs.loc[df_runs.netuid.isin([1,61,102])] # Now we filter for the netuid tag in build_data
383
  st.toast(f'Loaded {len(df_runs)} runs')
384
 
385
  df_vali = df_runs.loc[df_runs.username == username]