in-the-repobench

Sleeping

in-the-repobench / app.py

lvwerra HF Staff

Update app.py

fcb283e over 2 years ago

996 Bytes

	from datasets import load_dataset
	import streamlit as st
	from huggingface_hub import hf_hub_download
	import gzip
	import json


	@st.cache(allow_output_mutation=True)
	def load_all_usernames():
	filepath = hf_hub_download(repo_id="bigcode/the-stack-username-to-repo", filename="username_to_repo.json.gz", repo_type="dataset")

	with gzip.open(filepath, 'r') as f:
	usernames = json.loads(f.read().decode('utf-8'))
	return usernames

	st.title("Am I in The Stack?")
	st.markdown("This tool lets you check if a repository under a given username is part of [The Stack dataset](https://huggingface.co/datasets/bigcode/the-stack).")
	usernames = load_all_usernames()

	username = st.text_input("Your GitHub Username:")

	if st.button("Check!"):
	if username in usernames:
	st.markdown("Yes, your data is in The Stack:")
	st.markdown("\n".join([f"`{repo_name}`" for repo_name in usernames[username]]))
	else:
	st.markdown("No, your data is not in The Stack.")