Spaces:
No application file
No application file
import streamlit as st | |
import numpy as np | |
import pandas as pd | |
import json | |
import altair as alt | |
from pathlib import Path | |
import requests | |
class Dashboard: | |
class Model: | |
pageTitle = "Dashboard" | |
wordsTitle = "Words" | |
inferenceTimeTitle = "Inference Time" | |
documentsTitle = "Documents" | |
dailyInferenceTitle = "Top Daily Inference" | |
accuracyTitle = "Mean Accuracy" | |
titleModelEval = "## Evaluation Accuracy" | |
titleInferencePerformance = "## Inference Performance" | |
titleDatasetInfo = "## Dataset Info" | |
titleDataAnnotation = "## Data Annotation" | |
titleTrainingPerformance = "## Training Performance" | |
titleEvaluationPerformance = "## Evaluation Performance" | |
status_file = "docs/status.json" | |
annotation_files_dir = "docs/json" | |
def view(self, model): | |
# st.title(model.pageTitle) | |
api_url = "https://katanaml-org-sparrow-ml.hf.space/api-inference/v1/sparrow-ml/statistics" | |
json_data_inference = [] | |
response = requests.get(api_url) | |
if response.status_code == 200: | |
json_data_inference = response.json() | |
else: | |
print(f"Error: Unable to fetch data from the API (status code {response.status_code})") | |
api_url_t = "https://katanaml-org-sparrow-ml.hf.space/api-training/v1/sparrow-ml/statistics/training" | |
json_data_training = [] | |
response_t = requests.get(api_url_t) | |
if response_t.status_code == 200: | |
json_data_training = response_t.json() | |
else: | |
print(f"Error: Unable to fetch data from the API (status code {response_t.status_code})") | |
api_url_e = "https://katanaml-org-sparrow-ml.hf.space/api-training/v1/sparrow-ml/statistics/evaluate" | |
json_data_evaluate = [] | |
response_e = requests.get(api_url_e) | |
if response_e.status_code == 200: | |
json_data_evaluate = response_e.json() | |
else: | |
print(f"Error: Unable to fetch data from the API (status code {response_e.status_code})") | |
with st.container(): | |
col1, col2, col3, col4, col5 = st.columns(5) | |
with col1: | |
words_count = 0 | |
delta_words = 0 | |
if len(json_data_inference) > 3: | |
for i in range(0, len(json_data_inference)): | |
words_count = words_count + json_data_inference[i][1] | |
avg_word_count = words_count / len(json_data_inference) | |
avg_word_last = (json_data_inference[len(json_data_inference) - 1][1] | |
+ json_data_inference[len(json_data_inference) - 2][1] + | |
json_data_inference[len(json_data_inference) - 3][1]) / 3 | |
if avg_word_last >= avg_word_count: | |
delta_words = round(100 - ((avg_word_count * 100) / avg_word_last), 2) | |
else: | |
delta_words = round(100 - ((avg_word_last * 100) / avg_word_count), 2) * -1 | |
words_count = words_count / 1000 | |
st.metric(label=model.wordsTitle, value=str(words_count) + 'K', delta=str(delta_words) + "%") | |
with col2: | |
docs_count = len(json_data_inference) | |
delta_docs = 0 | |
if docs_count > 3: | |
inference_dates = [] | |
for i in range(0, len(json_data_inference)): | |
inference_dates.append(json_data_inference[i][4].split(" ")[0]) | |
inference_dates_unique = [] | |
for item in inference_dates: | |
if item not in inference_dates_unique: | |
inference_dates_unique.append(item) | |
if len(inference_dates_unique) > 3: | |
inference_dates_dict = {} | |
for i, key in enumerate(inference_dates_unique): | |
inference_dates_dict[key] = [0] | |
for i in range(0, len(json_data_inference)): | |
inference_dates_dict[json_data_inference[i][4].split(" ")[0]][0] = \ | |
inference_dates_dict[json_data_inference[i][4].split(" ")[0]][0] + 1 | |
# calculate average for values from inference_dates_dict | |
avg_value = 0 | |
for key, value in inference_dates_dict.items(): | |
avg_value = avg_value + value[0] | |
avg_value = round(avg_value / len(inference_dates_dict), 2) | |
# calculate average for last 3 values from inference_dates_dict | |
avg_value_last = 0 | |
for i in range(1, 4): | |
avg_value_last = avg_value_last + inference_dates_dict[inference_dates_unique[len(inference_dates_unique) - i]][0] | |
avg_value_last = round(avg_value_last / 3, 2) | |
if avg_value_last > avg_value: | |
delta_docs = round(100 - ((avg_value * 100) / avg_value_last), 2) | |
else: | |
delta_docs = round(100 - ((avg_value_last * 100) / avg_value), 2) * -1 | |
st.metric(label=model.documentsTitle, value=docs_count, delta=str(delta_docs) + "%") | |
with col3: | |
inference_dates = [] | |
for i in range(0, len(json_data_inference)): | |
inference_dates.append(json_data_inference[i][4].split(" ")[0]) | |
inference_dates_unique = [] | |
for item in inference_dates: | |
if item not in inference_dates_unique: | |
inference_dates_unique.append(item) | |
inference_dates_dict = {} | |
for i, key in enumerate(inference_dates_unique): | |
inference_dates_dict[key] = [0] | |
for i in range(0, len(json_data_inference)): | |
inference_dates_dict[json_data_inference[i][4].split(" ")[0]][0] = \ | |
inference_dates_dict[json_data_inference[i][4].split(" ")[0]][0] + 1 | |
# loop through the dictionary and find the max value | |
max_value = 0 | |
for key, value in inference_dates_dict.items(): | |
if value[0] > max_value: | |
max_value = value[0] | |
# calculate average for values from inference_dates_dict | |
avg_value = 0 | |
for key, value in inference_dates_dict.items(): | |
avg_value = avg_value + value[0] | |
avg_value = round(avg_value / len(inference_dates_dict), 2) | |
avg_delta = round(100 - ((avg_value * 100) / max_value), 2) | |
st.metric(label=model.dailyInferenceTitle, value=max_value, delta=str(avg_delta) + "%") | |
with col4: | |
inference_time_avg = 0 | |
# calculate inference time average | |
for i in range(0, len(json_data_inference)): | |
inference_time_avg = inference_time_avg + json_data_inference[i][0] | |
inference_time_avg = round(inference_time_avg / len(json_data_inference), 2) | |
delta_time = 0 | |
if len(json_data_inference) > 3: | |
avg_time_last = (json_data_inference[len(json_data_inference) - 1][0] + | |
json_data_inference[len(json_data_inference) - 2][0] + | |
json_data_inference[len(json_data_inference) - 3][0]) / 3 | |
if avg_time_last > inference_time_avg: | |
delta_time = round(100 - ((inference_time_avg * 100) / avg_time_last), 2) | |
else: | |
delta_time = round(100 - ((avg_time_last * 100) / inference_time_avg), 2) * -1 | |
st.metric(label=model.inferenceTimeTitle, value=str(inference_time_avg) + " s", delta=str(delta_time) + "%", | |
delta_color="inverse") | |
with col5: | |
models_unique = [] | |
models_dict = {} | |
for i in range(0, len(json_data_evaluate)): | |
if json_data_evaluate[i][3] not in models_unique: | |
models_unique.append(json_data_evaluate[i][3]) | |
models_dict[json_data_evaluate[i][3]] = json_data_evaluate[i][1]['mean_accuracy'] | |
avg_accuracy = 0 | |
for key, value in models_dict.items(): | |
avg_accuracy = avg_accuracy + value | |
avg_accuracy = round(avg_accuracy / len(models_dict), 2) | |
if len(models_unique) > 3: | |
# calculate average accuracy for last 3 values | |
avg_accuracy_last = 0 | |
for i in range(1, 4): | |
avg_accuracy_last = avg_accuracy_last + models_dict[models_unique[len(models_unique) - i]] | |
avg_accuracy_last = round(avg_accuracy_last / 3, 2) | |
else: | |
avg_accuracy_last = avg_accuracy | |
if avg_accuracy_last > avg_accuracy: | |
delta_accuracy = round(100 - ((avg_accuracy * 100) / avg_accuracy_last), 2) | |
else: | |
delta_accuracy = round(100 - ((avg_accuracy_last * 100) / avg_accuracy), 2) * -1 | |
st.metric(label=model.accuracyTitle, value=avg_accuracy, delta=str(delta_accuracy) + "%", | |
delta_color="inverse") | |
st.markdown("---") | |
with st.container(): | |
col1, col2 = st.columns(2) | |
with col1: | |
st.write(model.titleInferencePerformance) | |
models_dict = {} | |
models = [] | |
for i in range(0, len(json_data_inference)): | |
models.append(json_data_inference[i][3]) | |
models_unique = [] | |
for item in models: | |
if item not in models_unique: | |
models_unique.append(item) | |
for i, key in enumerate(models_unique): | |
models_dict[key] = [] | |
for i in range(0, len(json_data_inference)): | |
models_dict[json_data_inference[i][3]].append(round(json_data_inference[i][0])) | |
data = pd.DataFrame(models_dict) | |
st.line_chart(data) | |
with col2: | |
st.write(model.titleModelEval) | |
models_unique = [] | |
models_dict = {} | |
for i in range(0, len(json_data_evaluate)): | |
if json_data_evaluate[i][3] not in models_unique: | |
models_unique.append(json_data_evaluate[i][3]) | |
models_dict[json_data_evaluate[i][3]] = json_data_evaluate[i][1]['accuracies'] | |
data = pd.DataFrame(models_dict) | |
st.line_chart(data) | |
st.markdown("---") | |
with st.container(): | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
with st.container(): | |
st.write(model.titleDataAnnotation) | |
total, completed, in_progress = self.calculate_annotation_stats(model) | |
data = pd.DataFrame({"Status": ["Completed", "In Progress"], "Value": [completed, in_progress]}) | |
# Create a horizontal bar chart | |
chart = alt.Chart(data).mark_bar().encode( | |
x='Value:Q', | |
y=alt.Y('Status:N', sort='-x'), | |
color=alt.Color('Status:N', legend=None) | |
) | |
st.altair_chart(chart) | |
with col2: | |
with st.container(): | |
st.write(model.titleDatasetInfo) | |
api_url = "https://katanaml-org-sparrow-data.hf.space/api-dataset/v1/sparrow-data/dataset_info" | |
# Make the GET request | |
response = requests.get(api_url) | |
# Check if the request was successful (status code 200) | |
names = [] | |
rows = [] | |
if response.status_code == 200: | |
# Convert the response content to a JSON object | |
json_data = response.json() | |
for i in range(0, len(json_data['splits'])): | |
names.append(json_data['splits'][i]['name']) | |
rows.append(json_data['splits'][i]['number_of_rows']) | |
else: | |
print(f"Error: Unable to fetch data from the API (status code {response.status_code})") | |
data = pd.DataFrame({"Dataset": names, "Value": rows}) | |
# Create a horizontal bar chart | |
chart = alt.Chart(data).mark_bar().encode( | |
x='Value:Q', | |
y=alt.Y('Dataset:N', sort='-x'), | |
color=alt.Color('Dataset:N', legend=None) | |
) | |
st.altair_chart(chart) | |
with col3: | |
with st.container(): | |
st.write(model.titleTrainingPerformance) | |
models_dict = {} | |
for i in range(0, len(json_data_training)): | |
models_dict[i] = round(json_data_training[i][0]) | |
data = pd.DataFrame({"Runs": models_dict.keys(), "Value": list(models_dict.values())}) | |
# Create a horizontal bar chart | |
chart = alt.Chart(data).mark_bar().encode( | |
x='Value:Q', | |
y=alt.Y('Runs:N', sort='-x'), | |
color=alt.Color('Runs:N', legend=None) | |
) | |
st.altair_chart(chart) | |
st.markdown("---") | |
with st.container(): | |
st.write(model.titleEvaluationPerformance) | |
runs_dict = {} | |
for i in range(0, len(json_data_evaluate)): | |
runs_dict[i] = round(json_data_evaluate[i][0]) | |
data = pd.DataFrame({"Runs": runs_dict.keys(), "Value": list(runs_dict.values())}) | |
# Create a horizontal bar chart | |
chart = alt.Chart(data).mark_bar().encode( | |
x='Value:Q', | |
y=alt.Y('Runs:N', sort='-x'), | |
color=alt.Color('Runs:N', legend=None) | |
) | |
st.altair_chart(chart) | |
def calculate_annotation_stats(self, model): | |
completed = 0 | |
in_progress = 0 | |
data_dir_path = Path(model.annotation_files_dir) | |
for file_name in data_dir_path.glob("*.json"): | |
with open(file_name, "r") as f: | |
data = json.load(f) | |
v = data['meta']['version'] | |
if v == 'v0.1': | |
in_progress += 1 | |
else: | |
completed += 1 | |
total = completed + in_progress | |
status_json = { | |
"annotations": [ | |
{ | |
"completed": completed, | |
"in_progress": in_progress, | |
"total": total | |
} | |
] | |
} | |
with open(model.status_file, "w") as f: | |
json.dump(status_json, f, indent=2) | |
return total, completed, in_progress | |