|
import streamlit as st |
|
import os |
|
from datasets import load_dataset |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
|
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
|
ds = load_dataset("CohereForAI/mmlu-translations-results", split="train", token=HF_TOKEN) |
|
|
|
df = ds.to_pandas() |
|
|
|
st.title("MMLU Translations Progress") |
|
|
|
|
|
df['language'] = df['metadata'].apply(lambda x: x.get('language')) |
|
|
|
|
|
language_counts = df['language'].value_counts() |
|
|
|
|
|
fig, ax = plt.subplots() |
|
language_counts.plot(kind='bar', ax=ax) |
|
ax.set_title('Number of Completed Tasks for Each Language') |
|
ax.set_xlabel('Language') |
|
ax.set_ylabel('Count') |
|
|
|
|
|
language_counts_df = language_counts.reset_index() |
|
language_counts_df.columns = ['Language', 'Count'] |
|
|
|
|
|
st.table(language_counts_df) |
|
|
|
|
|
st.pyplot(fig) |
|
|
|
|
|
|
|
user_ids = df['responses'].apply(lambda x: x['is_edit_required']).explode().apply(lambda x: x['user_id']) |
|
user_id_counts = user_ids.value_counts() |
|
|
|
|
|
user_id_counts_df = user_id_counts.reset_index() |
|
user_id_counts_df.columns = ['User ID', 'Count'] |
|
|
|
|
|
st.table(user_id_counts_df) |
|
|
|
st.dataframe(df) |
|
|
|
|
|
|
|
|