File size: 2,493 Bytes
450856d d5df872 450856d b41a0ac 7ac4c38 6609394 450856d 7ac4c38 450856d 7ac4c38 3c986cb 450856d 3d89639 d1e562c 6609394 d1e562c d5df872 567a5be b41a0ac a3bc6dc b41a0ac a3bc6dc b41a0ac 567a5be 90ed9b3 ae90632 90ed9b3 7ac4c38 90ed9b3 7ac4c38 90ed9b3 7ac4c38 90ed9b3 3e1a2ec 7ac4c38 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import streamlit as st
import os
from datasets import load_dataset
import pandas as pd
import matplotlib.pyplot as plt
import argilla as rg
from datetime import datetime
ARGILLA_API_URL = os.environ.get("ARGILLA_API_URL")
ARGILLA_API_KEY = os.environ.get("ARGILLA_API_KEY")
HF_TOKEN = os.environ.get("HF_TOKEN")
client = rg.Argilla(
api_url=ARGILLA_API_URL,
api_key=ARGILLA_API_KEY
)
workspace = client.workspaces('cohere')
users_map = {str(user.id):user.username for user in list(workspace.users)}
ds = load_dataset("CohereForAI/mmlu-translations-results", split="train", token=HF_TOKEN)
df = ds.to_pandas()
st.title("π MMLU Translation Review Progress π")
st.markdown(f"**Total tasks completed:** {len(ds)}")
# Get the current local time
now = datetime.now()
top_of_the_hour = now.replace(minute=0, second=0, microsecond=0)
# Calculate the minutes past the top of the hour
minutes_past = (now - top_of_the_hour).seconds // 60
# Display the time as X minutes ago
st.markdown(f"**Last updated:** {minutes_past} minutes ago")
st.header("Progress by Language")
# Extract the language from the metadata column and create a new column
df['language'] = df['metadata'].apply(lambda x: x.get('language'))
# Count the occurrences of each language
language_counts = df['language'].value_counts()
# Plotting the bar chart using matplotlib
fig, ax = plt.subplots()
language_counts.plot(kind='bar', ax=ax)
ax.set_title('Number of Completed Tasks for Each Language')
ax.set_xlabel('Language')
ax.set_ylabel('Count')
# Convert the language counts to a DataFrame for display in the table
language_counts_df = language_counts.reset_index()
language_counts_df.columns = ['Language', 'Count']
# Display the table in the Streamlit app
st.table(language_counts_df)
# Display the plot in the Streamlit app
st.pyplot(fig)
st.header("Leaderboard")
# Extract user_id from the is_edit_required field in the response column and count occurrences
user_ids = df['responses'].apply(lambda x: x['is_edit_required']).explode().apply(lambda x: x['user_id'])
user_id_counts = user_ids.value_counts()
# Map user IDs to usernames
user_id_counts.index = user_id_counts.index.map(users_map)
# Convert the user ID counts to a DataFrame for display in the table
user_id_counts_df = user_id_counts.reset_index()
user_id_counts_df.columns = ['Username', 'Count']
# Display the table of username counts in the Streamlit app
st.table(user_id_counts_df)
st.header("Raw Dataset")
st.dataframe(df) |