Spaces:
Sleeping
Sleeping
from datasets import load_dataset | |
import streamlit as st | |
from huggingface_hub import hf_hub_download | |
import gzip | |
import json | |
def load_all_usernames(): | |
filepath = hf_hub_download(repo_id="bigcode/the-stack-username-to-repo", filename="username_to_repo.json.gz", repo_type="dataset") | |
with gzip.open(filepath, 'r') as f: | |
usernames = json.loads(f.read().decode('utf-8')) | |
return usernames | |
st.title("Am I in The Stack?") | |
st.markdown("This tool lets you check if a repository under a given username is part of [The Stack dataset](https://huggingface.co/datasets/bigcode/the-stack).") | |
usernames = load_all_usernames() | |
username = st.text_input("Your GitHub Username:") | |
if st.button("Check!"): | |
if username in usernames: | |
st.markdown("**Yes**, your data is in The Stack:") | |
st.markdown("\n".join([f"`{repo_name}`" for repo_name in usernames[username]])) | |
else: | |
st.markdown("**No**, your data is not in The Stack.") |