Spaces:

CALM
/

Dashboard

Runtime error

App Files Files Community

justheuristic commited on Dec 7, 2021

Commit

f3aa1a2

unverified ·

2 Parent(s): 08e475f b9cce0e

Merge pull request #2 from training-transformers-together/LS/add-leaderboard

Browse files

Files changed (4) hide show

app.py +99 -12
dashboard_utils/bubbles.py +50 -4
dashboard_utils/main_metrics.py +1 -1
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -2,23 +2,97 @@ import pandas as pd
 import streamlit as st
 import wandb
-from dashboard_utils.bubbles import get_new_bubble_data
 from dashboard_utils.main_metrics import get_main_metrics
 from streamlit_observable import observable
 # Only need to set these here as we are add controls outside of Hydralit, to customise a run Hydralit!
-st.set_page_config(page_title="Dashboard", layout="centered")
 wandb.login(anonymous="must")
-st.markdown("<h1 style='text-align: center;'>Dashboard</h1>", unsafe_allow_html=True)
-st.caption("Training Loss")
 steps, dates, losses, alive_peers = get_main_metrics()
 source = pd.DataFrame({"steps": steps, "loss": losses, "alive participants": alive_peers, "date": dates})
-st.vega_lite_chart(
     source,
     {
         "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
@@ -30,8 +104,7 @@ st.vega_lite_chart(
     use_container_width=True,
 )
-st.caption("Number of alive runs over time")
-st.vega_lite_chart(
     source,
     {
         "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
@@ -45,8 +118,7 @@ st.vega_lite_chart(
     },
     use_container_width=True,
 )
-st.caption("Number of steps")
-st.vega_lite_chart(
     source,
     {
         "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
@@ -58,11 +130,26 @@ st.vega_lite_chart(
     use_container_width=True,
 )
-st.header("Collaborative training participants")
 serialized_data, profiles = get_new_bubble_data()
 observable(
-    "Participants",
     notebook="d/9ae236a507f54046",  # "@huggingface/participants-bubbles-chart",
     targets=["c_noaws"],
-    redefine={"serializedData": serialized_data, "profileSimple": profiles},
 )

 import streamlit as st
 import wandb
+from dashboard_utils.bubbles import get_global_metrics, get_new_bubble_data, get_leaderboard
 from dashboard_utils.main_metrics import get_main_metrics
 from streamlit_observable import observable
+import time
+import requests
+import streamlit as st
+from streamlit_lottie import st_lottie
+def load_lottieurl(url: str):
+    r = requests.get(url)
+    if r.status_code != 200:
+        return None
+    return r.json()
 # Only need to set these here as we are add controls outside of Hydralit, to customise a run Hydralit!
+st.set_page_config(page_title="Dashboard", layout="wide")
+st.markdown("<h1 style='text-align: center;'>Dashboard</h1>", unsafe_allow_html=True)
+key_figures_margin_left, key_figures_c1, key_figures_c2, key_figures_c3, key_figures_margin_right = st.columns(
+    (2, 1, 1, 1, 2)
+)
+chart_c1, chart_c2 = st.columns((3, 2))
+lottie_url_loading = "https://assets5.lottiefiles.com/packages/lf20_OdNgAj.json"
+lottie_loading = load_lottieurl(lottie_url_loading)
+with key_figures_c1:
+    st.caption("\# of contributing users")
+    placeholder_key_figures_c1 = st.empty()
+    with placeholder_key_figures_c1:
+        st_lottie(lottie_loading, height=100, key="loading_key_figure_c1")
+with key_figures_c2:
+    st.caption("\# active users")
+    placeholder_key_figures_c2 = st.empty()
+    with placeholder_key_figures_c2:
+        st_lottie(lottie_loading, height=100, key="loading_key_figure_c2")
+with key_figures_c3:
+    st.caption("Total runtime")
+    placeholder_key_figures_c3 = st.empty()
+    with placeholder_key_figures_c3:
+        st_lottie(lottie_loading, height=100, key="loading_key_figure_c3")
+with chart_c1:
+    st.subheader("Metrics over time")
+    st.caption("Training Loss")
+    placeholder_chart_c1_1 = st.empty()
+    with placeholder_chart_c1_1:
+        st_lottie(lottie_loading, height=100, key="loading_c1_1")
+    st.caption("Number of alive runs over time")
+    placeholder_chart_c1_2 = st.empty()
+    with placeholder_chart_c1_2:
+        st_lottie(lottie_loading, height=100, key="loading_c1_2")
+    st.caption("Number of steps")
+    placeholder_chart_c1_3 = st.empty()
+    with placeholder_chart_c1_3:
+        st_lottie(lottie_loading, height=100, key="loading_c1_3")
+with chart_c2:
+    st.subheader("Global metrics")
+    st.caption("Collaborative training participants")
+    placeholder_chart_c2_1 = st.empty()
+    with placeholder_chart_c2_1:
+        st_lottie(lottie_loading, height=100, key="loading_c2_1")
+    st.write("Chart showing participants of the collaborative-training. Circle radius is relative to the total number of "
+    "processed batches, the circle is greyed if the participant is not active. Every purple square represents an "
+    "active device, darker color corresponds to higher performance.")
+    st.caption("Leaderboard")
+    placeholder_chart_c2_3 = st.empty()
+    with placeholder_chart_c2_3:
+        st_lottie(lottie_loading, height=100, key="loading_c2_2")
 wandb.login(anonymous="must")
 steps, dates, losses, alive_peers = get_main_metrics()
 source = pd.DataFrame({"steps": steps, "loss": losses, "alive participants": alive_peers, "date": dates})
+placeholder_chart_c1_1.vega_lite_chart(
     source,
     {
         "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
     use_container_width=True,
 )
+placeholder_chart_c1_2.vega_lite_chart(
     source,
     {
         "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
     },
     use_container_width=True,
 )
+placeholder_chart_c1_3.vega_lite_chart(
     source,
     {
         "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
     use_container_width=True,
 )
 serialized_data, profiles = get_new_bubble_data()
+df_leaderboard = get_leaderboard(serialized_data)
 observable(
+    "_",
     notebook="d/9ae236a507f54046",  # "@huggingface/participants-bubbles-chart",
     targets=["c_noaws"],
+    redefine={"serializedData": serialized_data, "profileSimple": profiles, "width": 0},
 )
+placeholder_chart_c2_3.dataframe(df_leaderboard[["User", "Total time contributed"]])
+global_metrics = get_global_metrics(serialized_data)
+placeholder_key_figures_c1.write(f"<b>{global_metrics['num_contributing_users']}</b>", unsafe_allow_html=True)
+placeholder_key_figures_c2.write(f"<b>{global_metrics['num_active_users']}</b>", unsafe_allow_html=True)
+placeholder_key_figures_c3.write(f"<b>{global_metrics['total_runtime']}</b>", unsafe_allow_html=True)
+with placeholder_chart_c2_1:
+    observable(
+        "Participants",
+        notebook="d/9ae236a507f54046",  # "@huggingface/participants-bubbles-chart",
+        targets=["c_noaws"],
+        redefine={"serializedData": serialized_data, "profileSimple": profiles},
+    )

dashboard_utils/bubbles.py CHANGED Viewed

@@ -2,6 +2,8 @@ import datetime
 from concurrent.futures import as_completed
 from urllib import parse
 import streamlit as st
 import wandb
 from requests_futures.sessions import FuturesSession
@@ -11,9 +13,10 @@ from dashboard_utils.time_tracker import _log, simple_time_tracker
 URL_QUICKSEARCH = "https://huggingface.co/api/quicksearch?"
 WANDB_REPO = "learning-at-home/Worker_logs"
 CACHE_TTL = 100
-@st.cache(ttl=CACHE_TTL)
 @simple_time_tracker(_log)
 def get_new_bubble_data():
     serialized_data_points, latest_timestamp = get_serialized_data_points()
@@ -28,7 +31,7 @@ def get_new_bubble_data():
     return serialized_data, profiles
-@st.cache(ttl=CACHE_TTL)
 @simple_time_tracker(_log)
 def get_profiles(usernames):
     profiles = []
@@ -60,7 +63,7 @@ def get_profiles(usernames):
     return profiles
-@st.cache(ttl=CACHE_TTL)
 @simple_time_tracker(_log)
 def get_serialized_data_points():
@@ -108,7 +111,7 @@ def get_serialized_data_points():
     return serialized_data_points, latest_timestamp
-@st.cache(ttl=CACHE_TTL)
 @simple_time_tracker(_log)
 def get_serialized_data(serialized_data_points, latest_timestamp):
     serialized_data_points_v2 = []
@@ -138,3 +141,46 @@ def get_serialized_data(serialized_data_points, latest_timestamp):
         serialized_data_points_v2.append(new_item)
     serialized_data = {"points": [serialized_data_points_v2], "maxVelocity": max_velocity}
     return serialized_data

 from concurrent.futures import as_completed
 from urllib import parse
+import pandas as pd
 import streamlit as st
 import wandb
 from requests_futures.sessions import FuturesSession
 URL_QUICKSEARCH = "https://huggingface.co/api/quicksearch?"
 WANDB_REPO = "learning-at-home/Worker_logs"
 CACHE_TTL = 100
+MAX_DELTA_ACTIVE_RUN_SEC = 60 * 5
+@st.cache(ttl=CACHE_TTL, show_spinner=False)
 @simple_time_tracker(_log)
 def get_new_bubble_data():
     serialized_data_points, latest_timestamp = get_serialized_data_points()
     return serialized_data, profiles
+@st.cache(ttl=CACHE_TTL, show_spinner=False)
 @simple_time_tracker(_log)
 def get_profiles(usernames):
     profiles = []
     return profiles
+@st.cache(ttl=CACHE_TTL, show_spinner=False)
 @simple_time_tracker(_log)
 def get_serialized_data_points():
     return serialized_data_points, latest_timestamp
+@st.cache(ttl=CACHE_TTL, show_spinner=False)
 @simple_time_tracker(_log)
 def get_serialized_data(serialized_data_points, latest_timestamp):
     serialized_data_points_v2 = []
         serialized_data_points_v2.append(new_item)
     serialized_data = {"points": [serialized_data_points_v2], "maxVelocity": max_velocity}
     return serialized_data
+def get_leaderboard(serialized_data):
+    data_leaderboard = {"user": [], "runtime": []}
+    for user_item in serialized_data["points"][0]:
+        data_leaderboard["user"].append(user_item["profileId"])
+        data_leaderboard["runtime"].append(user_item["runtime"])
+    df = pd.DataFrame(data_leaderboard)
+    df = df.sort_values("runtime", ascending=False)
+    df["runtime"] = df["runtime"].apply(lambda x: datetime.timedelta(seconds=x))
+    df["runtime"] = df["runtime"].apply(lambda x: str(x))
+    df.reset_index(drop=True, inplace=True)
+    df.rename(columns={"user": "User", "runtime": "Total time contributed"}, inplace=True)
+    df["Rank"] = df.index + 1
+    df = df.set_index("Rank")
+    return df
+def get_global_metrics(serialized_data):
+    current_time = datetime.datetime.utcnow()
+    num_contributing_users = len(serialized_data["points"][0])
+    num_active_users = 0
+    total_runtime = 0
+    for user_item in serialized_data["points"][0]:
+        for run in user_item["activeRuns"]:
+            date_run = datetime.datetime.fromisoformat(run["date"])
+            delta_time_sec = (current_time - date_run).total_seconds()
+            if delta_time_sec < MAX_DELTA_ACTIVE_RUN_SEC:
+                num_active_users += 1
+                break
+        total_runtime += user_item["runtime"]
+    total_runtime = datetime.timedelta(seconds=total_runtime)
+    return {
+        "num_contributing_users": num_contributing_users,
+        "num_active_users": num_active_users,
+        "total_runtime": total_runtime,
+    }

dashboard_utils/main_metrics.py CHANGED Viewed

@@ -9,7 +9,7 @@ WANDB_REPO = "learning-at-home/Main_metrics"
 CACHE_TTL = 100
-@st.cache(ttl=CACHE_TTL)
 @simple_time_tracker(_log)
 def get_main_metrics():
     api = wandb.Api()

 CACHE_TTL = 100
+@st.cache(ttl=CACHE_TTL, show_spinner=False)
 @simple_time_tracker(_log)
 def get_main_metrics():
     api = wandb.Api()

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 streamlit
 wandb
-requests_futures

 streamlit
 wandb
+requests_futures
+streamlit-lottie