Spaces:

argilla
/

mmlu-translation-progress

Running

App Files Files Community

mmlu-translation-progress / app.py

dvilasuero HF staff

Update app.py

d1e562c verified 6 months ago

raw

history blame

2.39 kB

	import streamlit as st
	import os
	from datasets import load_dataset
	import pandas as pd
	import matplotlib.pyplot as plt
	import argilla as rg
	from datetime import datetime


	ARGILLA_API_URL = os.environ.get("ARGILLA_API_URL")
	ARGILLA_API_KEY = os.environ.get("ARGILLA_API_KEY")
	HF_TOKEN = os.environ.get("HF_TOKEN")

	client = rg.Argilla(
	api_url=ARGILLA_API_URL,
	api_key=ARGILLA_API_KEY
	)

	workspace = client.workspaces('cohere')

	users_map = {str(user.id):user.username for user in list(workspace.users)}

	ds = load_dataset("CohereForAI/mmlu-translations-results", split="train", token=HF_TOKEN)

	df = ds.to_pandas()

	st.title("MMLU Translations Progress")

	st.markdown(f"Total tasks completed: {len(ds)}")

	# Get the current local time
	now = datetime.now()
	top_of_the_hour = now.replace(minute=0, second=0, microsecond=0)

	# Calculate the minutes past the top of the hour
	minutes_past = (now - top_of_the_hour).seconds // 60

	# Display the time as X minutes ago
	st.markdown(f"Last updated: {minutes_past} minutes ago")

	# Extract the language from the metadata column and create a new column
	df['language'] = df['metadata'].apply(lambda x: x.get('language'))

	# Count the occurrences of each language
	language_counts = df['language'].value_counts()

	# Plotting the bar chart using matplotlib
	fig, ax = plt.subplots()
	language_counts.plot(kind='bar', ax=ax)
	ax.set_title('Number of Completed Tasks for Each Language')
	ax.set_xlabel('Language')
	ax.set_ylabel('Count')

	# Convert the language counts to a DataFrame for display in the table
	language_counts_df = language_counts.reset_index()
	language_counts_df.columns = ['Language', 'Count']

	# Display the table in the Streamlit app
	st.table(language_counts_df)

	# Display the plot in the Streamlit app
	st.pyplot(fig)


	# Extract user_id from the is_edit_required field in the response column and count occurrences
	user_ids = df['responses'].apply(lambda x: x['is_edit_required']).explode().apply(lambda x: x['user_id'])
	user_id_counts = user_ids.value_counts()

	# Map user IDs to usernames
	user_id_counts.index = user_id_counts.index.map(users_map)

	# Convert the user ID counts to a DataFrame for display in the table
	user_id_counts_df = user_id_counts.reset_index()
	user_id_counts_df.columns = ['Username', 'Count']

	# Display the table of username counts in the Streamlit app
	st.table(user_id_counts_df)

	st.dataframe(df)