|
import streamlit as st |
|
import os |
|
from datasets import load_dataset |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
import argilla as rg |
|
from datetime import datetime |
|
|
|
|
|
ARGILLA_API_URL = os.environ.get("ARGILLA_API_URL") |
|
ARGILLA_API_KEY = os.environ.get("ARGILLA_API_KEY") |
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
|
client = rg.Argilla( |
|
api_url=ARGILLA_API_URL, |
|
api_key=ARGILLA_API_KEY |
|
) |
|
|
|
workspace = client.workspaces('cohere') |
|
|
|
users_map = {str(user.id):user.username for user in list(workspace.users)} |
|
|
|
ds = load_dataset("CohereForAI/mmlu-translations-results", split="train", token=HF_TOKEN) |
|
|
|
df = ds.to_pandas() |
|
|
|
st.title("π MMLU Translation Review Progress π") |
|
|
|
st.markdown(f"**Total tasks completed:** {len(ds)}") |
|
|
|
|
|
now = datetime.now() |
|
top_of_the_hour = now.replace(minute=0, second=0, microsecond=0) |
|
|
|
|
|
minutes_past = (now - top_of_the_hour).seconds // 60 |
|
|
|
|
|
st.markdown(f"**Last updated:** {minutes_past} minutes ago") |
|
|
|
st.header("Progress by Language") |
|
|
|
|
|
df['language'] = df['metadata'].apply(lambda x: x.get('language')) |
|
|
|
|
|
language_counts = df['language'].value_counts() |
|
|
|
|
|
fig, ax = plt.subplots() |
|
language_counts.plot(kind='bar', ax=ax) |
|
ax.set_title('Number of Completed Tasks for Each Language') |
|
ax.set_xlabel('Language') |
|
ax.set_ylabel('Count') |
|
|
|
|
|
language_counts_df = language_counts.reset_index() |
|
language_counts_df.columns = ['Language', 'Count'] |
|
|
|
|
|
st.table(language_counts_df) |
|
|
|
|
|
st.pyplot(fig) |
|
|
|
st.header("Leaderboard") |
|
|
|
|
|
user_ids = df['responses'].apply(lambda x: x['is_edit_required']).explode().apply(lambda x: x['user_id']) |
|
user_id_counts = user_ids.value_counts() |
|
|
|
|
|
user_id_counts.index = user_id_counts.index.map(users_map) |
|
|
|
|
|
user_id_counts_df = user_id_counts.reset_index() |
|
user_id_counts_df.columns = ['Username', 'Count'] |
|
|
|
|
|
st.table(user_id_counts_df) |
|
|
|
|
|
st.header("Raw Dataset") |
|
|
|
st.dataframe(df) |