steffenc commited on
Commit
163582f
·
unverified ·
2 Parent(s): ac041e1 27f17e9

Merge pull request #3 from opentensor/mvp-dashboard

Browse files
opendashboards/dashboard.py → dashboard.py RENAMED
File without changes
opendashboards/assets/inspect.py CHANGED
@@ -3,7 +3,6 @@ import streamlit as st
3
  import pandas as pd
4
  import opendashboards.utils.utils as utils
5
 
6
-
7
  @st.cache_data
8
  def explode_data(df):
9
  list_cols = utils.get_list_col_lengths(df)
 
3
  import pandas as pd
4
  import opendashboards.utils.utils as utils
5
 
 
6
  @st.cache_data
7
  def explode_data(df):
8
  list_cols = utils.get_list_col_lengths(df)
opendashboards/assets/io.py CHANGED
@@ -5,13 +5,12 @@ import streamlit as st
5
 
6
  import opendashboards.utils.utils as utils
7
 
8
- BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
9
 
10
  @st.cache_data
11
  def load_runs(project, filters, min_steps=10):
12
  runs = []
13
  msg = st.empty()
14
- for run in utils.get_runs(project, filters):
15
  step = run.summary.get('_step',0)
16
  if step < min_steps:
17
  msg.warning(f'Skipped run `{run.name}` because it contains {step} events (<{min_steps})')
@@ -48,17 +47,19 @@ def load_data(selected_runs, load=True, save=False):
48
 
49
  frames = []
50
  n_events = 0
 
51
  progress = st.progress(0, 'Loading data')
52
  info = st.empty()
 
 
53
  for i, idx in enumerate(selected_runs.index):
54
  run = selected_runs.loc[idx]
55
- prog_msg = f'Loading data {i/len(selected_runs)*100:.0f}% ({i}/{len(selected_runs)} runs, {n_events} events)'
56
 
57
- rel_path = os.path.join('data',f'history-{run.id}.csv')
58
- file_path = os.path.join(BASE_DIR,rel_path)
59
 
60
  if load and os.path.exists(file_path):
61
- progress.progress(i/len(selected_runs),f'{prog_msg}... **reading** `{rel_path}`')
62
  try:
63
  df = utils.load_data(file_path)
64
  except Exception as e:
@@ -70,9 +71,8 @@ def load_data(selected_runs, load=True, save=False):
70
  try:
71
  # Download the history from wandb
72
  df = utils.download_data(run.path)
 
73
  df.assign(**run.to_dict())
74
- if not os.path.exists('data/'):
75
- os.makedirs(file_path)
76
 
77
  if save and run.state != 'running':
78
  df.to_csv(file_path, index=False)
@@ -84,6 +84,7 @@ def load_data(selected_runs, load=True, save=False):
84
 
85
  frames.append(df)
86
  n_events += df.shape[0]
 
87
 
88
  progress.empty()
89
  if not frames:
 
5
 
6
  import opendashboards.utils.utils as utils
7
 
 
8
 
9
  @st.cache_data
10
  def load_runs(project, filters, min_steps=10):
11
  runs = []
12
  msg = st.empty()
13
+ for run in utils.get_runs(project, filters, api_key=st.secrets['WANDB_API_KEY']):
14
  step = run.summary.get('_step',0)
15
  if step < min_steps:
16
  msg.warning(f'Skipped run `{run.name}` because it contains {step} events (<{min_steps})')
 
47
 
48
  frames = []
49
  n_events = 0
50
+ successful = 0
51
  progress = st.progress(0, 'Loading data')
52
  info = st.empty()
53
+ if not os.path.exists('data/'):
54
+ os.makedirs('data/')
55
  for i, idx in enumerate(selected_runs.index):
56
  run = selected_runs.loc[idx]
57
+ prog_msg = f'Loading data {i/len(selected_runs)*100:.0f}% ({successful}/{len(selected_runs)} runs, {n_events} events)'
58
 
59
+ file_path = os.path.join('data',f'history-{run.id}.csv')
 
60
 
61
  if load and os.path.exists(file_path):
62
+ progress.progress(i/len(selected_runs),f'{prog_msg}... **reading** `{file_path}`')
63
  try:
64
  df = utils.load_data(file_path)
65
  except Exception as e:
 
71
  try:
72
  # Download the history from wandb
73
  df = utils.download_data(run.path)
74
+ # Add metadata to the dataframe
75
  df.assign(**run.to_dict())
 
 
76
 
77
  if save and run.state != 'running':
78
  df.to_csv(file_path, index=False)
 
84
 
85
  frames.append(df)
86
  n_events += df.shape[0]
87
+ successful += 1
88
 
89
  progress.empty()
90
  if not frames:
opendashboards/assets/metric.py CHANGED
@@ -1,5 +1,3 @@
1
- import os
2
- import re
3
  import time
4
  import pandas as pd
5
  import streamlit as st
 
 
 
1
  import time
2
  import pandas as pd
3
  import streamlit as st
opendashboards/assets/plot.py CHANGED
@@ -1,6 +1,6 @@
1
 
2
  import streamlit as st
3
- import utils.plotting as plotting
4
 
5
  # @st.cache_data
6
  def uid_diversty(df, rm_failed=True):
 
1
 
2
  import streamlit as st
3
+ import opendashboards.utils.plotting as plotting
4
 
5
  # @st.cache_data
6
  def uid_diversty(df, rm_failed=True):
opendashboards/utils/plotting.py CHANGED
@@ -251,7 +251,7 @@ def plot_leaderboard(
251
 
252
  print(f"Using top {ntop} {group_on} by {agg_col}: \n{rankings}")
253
  return px.bar(
254
- x=rankings,
255
  y=index,
256
  color=rankings,
257
  orientation="h",
 
251
 
252
  print(f"Using top {ntop} {group_on} by {agg_col}: \n{rankings}")
253
  return px.bar(
254
+ x=rankings.astype(float),
255
  y=index,
256
  color=rankings,
257
  orientation="h",
opendashboards/utils/utils.py CHANGED
@@ -24,7 +24,7 @@ from pandas.api.types import is_list_like
24
  from typing import List, Dict, Any, Union
25
 
26
 
27
- def get_runs(project: str = "openvalidators", filters: Dict[str, Any] = None, return_paths: bool = False) -> List:
28
  """Download runs from wandb.
29
 
30
  Args:
@@ -35,8 +35,8 @@ def get_runs(project: str = "openvalidators", filters: Dict[str, Any] = None, re
35
  Returns:
36
  List[wandb.apis.public.Run]: List of runs or run paths (List[str]).
37
  """
38
- api = wandb.Api()
39
- wandb.login()
40
 
41
  runs = api.runs(project, filters=filters)
42
  if return_paths:
@@ -45,7 +45,7 @@ def get_runs(project: str = "openvalidators", filters: Dict[str, Any] = None, re
45
  return runs
46
 
47
 
48
- def download_data(run_path: Union[str, List] = None, timeout: float = 600) -> pd.DataFrame:
49
  """Download data from wandb.
50
 
51
  Args:
@@ -55,8 +55,8 @@ def download_data(run_path: Union[str, List] = None, timeout: float = 600) -> pd
55
  Returns:
56
  pd.DataFrame: Dataframe of event log.
57
  """
58
- api = wandb.Api(timeout=timeout)
59
- wandb.login()
60
 
61
  if isinstance(run_path, str):
62
  run_path = [run_path]
 
24
  from typing import List, Dict, Any, Union
25
 
26
 
27
+ def get_runs(project: str = "openvalidators", filters: Dict[str, Any] = None, return_paths: bool = False, api_key: str = None) -> List:
28
  """Download runs from wandb.
29
 
30
  Args:
 
35
  Returns:
36
  List[wandb.apis.public.Run]: List of runs or run paths (List[str]).
37
  """
38
+ api = wandb.Api(api_key=api_key)
39
+ wandb.login(anonymous="allow")
40
 
41
  runs = api.runs(project, filters=filters)
42
  if return_paths:
 
45
  return runs
46
 
47
 
48
+ def download_data(run_path: Union[str, List] = None, timeout: float = 600, api_key: str = None) -> pd.DataFrame:
49
  """Download data from wandb.
50
 
51
  Args:
 
55
  Returns:
56
  pd.DataFrame: Dataframe of event log.
57
  """
58
+ api = wandb.Api(api_key=api_key, timeout=timeout)
59
+ wandb.login(anonymous="allow")
60
 
61
  if isinstance(run_path, str):
62
  run_path = [run_path]