sparrow-ui / toolbar /views /dashboard.py
JohnTan38's picture
Upload folder using huggingface_hub
e8a20da
import streamlit as st
import numpy as np
import pandas as pd
import json
import altair as alt
from pathlib import Path
import requests
class Dashboard:
class Model:
pageTitle = "Dashboard"
wordsTitle = "Words"
inferenceTimeTitle = "Inference Time"
documentsTitle = "Documents"
dailyInferenceTitle = "Top Daily Inference"
accuracyTitle = "Mean Accuracy"
titleModelEval = "## Evaluation Accuracy"
titleInferencePerformance = "## Inference Performance"
titleDatasetInfo = "## Dataset Info"
titleDataAnnotation = "## Data Annotation"
titleTrainingPerformance = "## Training Performance"
titleEvaluationPerformance = "## Evaluation Performance"
status_file = "docs/status.json"
annotation_files_dir = "docs/json"
def view(self, model):
# st.title(model.pageTitle)
api_url = "https://katanaml-org-sparrow-ml.hf.space/api-inference/v1/sparrow-ml/statistics"
json_data_inference = []
response = requests.get(api_url)
if response.status_code == 200:
json_data_inference = response.json()
else:
print(f"Error: Unable to fetch data from the API (status code {response.status_code})")
api_url_t = "https://katanaml-org-sparrow-ml.hf.space/api-training/v1/sparrow-ml/statistics/training"
json_data_training = []
response_t = requests.get(api_url_t)
if response_t.status_code == 200:
json_data_training = response_t.json()
else:
print(f"Error: Unable to fetch data from the API (status code {response_t.status_code})")
api_url_e = "https://katanaml-org-sparrow-ml.hf.space/api-training/v1/sparrow-ml/statistics/evaluate"
json_data_evaluate = []
response_e = requests.get(api_url_e)
if response_e.status_code == 200:
json_data_evaluate = response_e.json()
else:
print(f"Error: Unable to fetch data from the API (status code {response_e.status_code})")
with st.container():
col1, col2, col3, col4, col5 = st.columns(5)
with col1:
words_count = 0
delta_words = 0
if len(json_data_inference) > 3:
for i in range(0, len(json_data_inference)):
words_count = words_count + json_data_inference[i][1]
avg_word_count = words_count / len(json_data_inference)
avg_word_last = (json_data_inference[len(json_data_inference) - 1][1]
+ json_data_inference[len(json_data_inference) - 2][1] +
json_data_inference[len(json_data_inference) - 3][1]) / 3
if avg_word_last >= avg_word_count:
delta_words = round(100 - ((avg_word_count * 100) / avg_word_last), 2)
else:
delta_words = round(100 - ((avg_word_last * 100) / avg_word_count), 2) * -1
words_count = words_count / 1000
st.metric(label=model.wordsTitle, value=str(words_count) + 'K', delta=str(delta_words) + "%")
with col2:
docs_count = len(json_data_inference)
delta_docs = 0
if docs_count > 3:
inference_dates = []
for i in range(0, len(json_data_inference)):
inference_dates.append(json_data_inference[i][4].split(" ")[0])
inference_dates_unique = []
for item in inference_dates:
if item not in inference_dates_unique:
inference_dates_unique.append(item)
if len(inference_dates_unique) > 3:
inference_dates_dict = {}
for i, key in enumerate(inference_dates_unique):
inference_dates_dict[key] = [0]
for i in range(0, len(json_data_inference)):
inference_dates_dict[json_data_inference[i][4].split(" ")[0]][0] = \
inference_dates_dict[json_data_inference[i][4].split(" ")[0]][0] + 1
# calculate average for values from inference_dates_dict
avg_value = 0
for key, value in inference_dates_dict.items():
avg_value = avg_value + value[0]
avg_value = round(avg_value / len(inference_dates_dict), 2)
# calculate average for last 3 values from inference_dates_dict
avg_value_last = 0
for i in range(1, 4):
avg_value_last = avg_value_last + inference_dates_dict[inference_dates_unique[len(inference_dates_unique) - i]][0]
avg_value_last = round(avg_value_last / 3, 2)
if avg_value_last > avg_value:
delta_docs = round(100 - ((avg_value * 100) / avg_value_last), 2)
else:
delta_docs = round(100 - ((avg_value_last * 100) / avg_value), 2) * -1
st.metric(label=model.documentsTitle, value=docs_count, delta=str(delta_docs) + "%")
with col3:
inference_dates = []
for i in range(0, len(json_data_inference)):
inference_dates.append(json_data_inference[i][4].split(" ")[0])
inference_dates_unique = []
for item in inference_dates:
if item not in inference_dates_unique:
inference_dates_unique.append(item)
inference_dates_dict = {}
for i, key in enumerate(inference_dates_unique):
inference_dates_dict[key] = [0]
for i in range(0, len(json_data_inference)):
inference_dates_dict[json_data_inference[i][4].split(" ")[0]][0] = \
inference_dates_dict[json_data_inference[i][4].split(" ")[0]][0] + 1
# loop through the dictionary and find the max value
max_value = 0
for key, value in inference_dates_dict.items():
if value[0] > max_value:
max_value = value[0]
# calculate average for values from inference_dates_dict
avg_value = 0
for key, value in inference_dates_dict.items():
avg_value = avg_value + value[0]
avg_value = round(avg_value / len(inference_dates_dict), 2)
avg_delta = round(100 - ((avg_value * 100) / max_value), 2)
st.metric(label=model.dailyInferenceTitle, value=max_value, delta=str(avg_delta) + "%")
with col4:
inference_time_avg = 0
# calculate inference time average
for i in range(0, len(json_data_inference)):
inference_time_avg = inference_time_avg + json_data_inference[i][0]
inference_time_avg = round(inference_time_avg / len(json_data_inference), 2)
delta_time = 0
if len(json_data_inference) > 3:
avg_time_last = (json_data_inference[len(json_data_inference) - 1][0] +
json_data_inference[len(json_data_inference) - 2][0] +
json_data_inference[len(json_data_inference) - 3][0]) / 3
if avg_time_last > inference_time_avg:
delta_time = round(100 - ((inference_time_avg * 100) / avg_time_last), 2)
else:
delta_time = round(100 - ((avg_time_last * 100) / inference_time_avg), 2) * -1
st.metric(label=model.inferenceTimeTitle, value=str(inference_time_avg) + " s", delta=str(delta_time) + "%",
delta_color="inverse")
with col5:
models_unique = []
models_dict = {}
for i in range(0, len(json_data_evaluate)):
if json_data_evaluate[i][3] not in models_unique:
models_unique.append(json_data_evaluate[i][3])
models_dict[json_data_evaluate[i][3]] = json_data_evaluate[i][1]['mean_accuracy']
avg_accuracy = 0
for key, value in models_dict.items():
avg_accuracy = avg_accuracy + value
avg_accuracy = round(avg_accuracy / len(models_dict), 2)
if len(models_unique) > 3:
# calculate average accuracy for last 3 values
avg_accuracy_last = 0
for i in range(1, 4):
avg_accuracy_last = avg_accuracy_last + models_dict[models_unique[len(models_unique) - i]]
avg_accuracy_last = round(avg_accuracy_last / 3, 2)
else:
avg_accuracy_last = avg_accuracy
if avg_accuracy_last > avg_accuracy:
delta_accuracy = round(100 - ((avg_accuracy * 100) / avg_accuracy_last), 2)
else:
delta_accuracy = round(100 - ((avg_accuracy_last * 100) / avg_accuracy), 2) * -1
st.metric(label=model.accuracyTitle, value=avg_accuracy, delta=str(delta_accuracy) + "%",
delta_color="inverse")
st.markdown("---")
with st.container():
col1, col2 = st.columns(2)
with col1:
st.write(model.titleInferencePerformance)
models_dict = {}
models = []
for i in range(0, len(json_data_inference)):
models.append(json_data_inference[i][3])
models_unique = []
for item in models:
if item not in models_unique:
models_unique.append(item)
for i, key in enumerate(models_unique):
models_dict[key] = []
for i in range(0, len(json_data_inference)):
models_dict[json_data_inference[i][3]].append(round(json_data_inference[i][0]))
data = pd.DataFrame(models_dict)
st.line_chart(data)
with col2:
st.write(model.titleModelEval)
models_unique = []
models_dict = {}
for i in range(0, len(json_data_evaluate)):
if json_data_evaluate[i][3] not in models_unique:
models_unique.append(json_data_evaluate[i][3])
models_dict[json_data_evaluate[i][3]] = json_data_evaluate[i][1]['accuracies']
data = pd.DataFrame(models_dict)
st.line_chart(data)
st.markdown("---")
with st.container():
col1, col2, col3 = st.columns(3)
with col1:
with st.container():
st.write(model.titleDataAnnotation)
total, completed, in_progress = self.calculate_annotation_stats(model)
data = pd.DataFrame({"Status": ["Completed", "In Progress"], "Value": [completed, in_progress]})
# Create a horizontal bar chart
chart = alt.Chart(data).mark_bar().encode(
x='Value:Q',
y=alt.Y('Status:N', sort='-x'),
color=alt.Color('Status:N', legend=None)
)
st.altair_chart(chart)
with col2:
with st.container():
st.write(model.titleDatasetInfo)
api_url = "https://katanaml-org-sparrow-data.hf.space/api-dataset/v1/sparrow-data/dataset_info"
# Make the GET request
response = requests.get(api_url)
# Check if the request was successful (status code 200)
names = []
rows = []
if response.status_code == 200:
# Convert the response content to a JSON object
json_data = response.json()
for i in range(0, len(json_data['splits'])):
names.append(json_data['splits'][i]['name'])
rows.append(json_data['splits'][i]['number_of_rows'])
else:
print(f"Error: Unable to fetch data from the API (status code {response.status_code})")
data = pd.DataFrame({"Dataset": names, "Value": rows})
# Create a horizontal bar chart
chart = alt.Chart(data).mark_bar().encode(
x='Value:Q',
y=alt.Y('Dataset:N', sort='-x'),
color=alt.Color('Dataset:N', legend=None)
)
st.altair_chart(chart)
with col3:
with st.container():
st.write(model.titleTrainingPerformance)
models_dict = {}
for i in range(0, len(json_data_training)):
models_dict[i] = round(json_data_training[i][0])
data = pd.DataFrame({"Runs": models_dict.keys(), "Value": list(models_dict.values())})
# Create a horizontal bar chart
chart = alt.Chart(data).mark_bar().encode(
x='Value:Q',
y=alt.Y('Runs:N', sort='-x'),
color=alt.Color('Runs:N', legend=None)
)
st.altair_chart(chart)
st.markdown("---")
with st.container():
st.write(model.titleEvaluationPerformance)
runs_dict = {}
for i in range(0, len(json_data_evaluate)):
runs_dict[i] = round(json_data_evaluate[i][0])
data = pd.DataFrame({"Runs": runs_dict.keys(), "Value": list(runs_dict.values())})
# Create a horizontal bar chart
chart = alt.Chart(data).mark_bar().encode(
x='Value:Q',
y=alt.Y('Runs:N', sort='-x'),
color=alt.Color('Runs:N', legend=None)
)
st.altair_chart(chart)
def calculate_annotation_stats(self, model):
completed = 0
in_progress = 0
data_dir_path = Path(model.annotation_files_dir)
for file_name in data_dir_path.glob("*.json"):
with open(file_name, "r") as f:
data = json.load(f)
v = data['meta']['version']
if v == 'v0.1':
in_progress += 1
else:
completed += 1
total = completed + in_progress
status_json = {
"annotations": [
{
"completed": completed,
"in_progress": in_progress,
"total": total
}
]
}
with open(model.status_file, "w") as f:
json.dump(status_json, f, indent=2)
return total, completed, in_progress