in-the-repobench

Sleeping

lvwerra HF Staff commited on Nov 10, 2022

Commit

fcb283e

1 Parent(s): fc6a73f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,20 +1,27 @@
 from datasets import load_dataset
 import streamlit as st
 @st.cache(allow_output_mutation=True)
 def load_all_usernames():
-    list_of_usernames = load_dataset("bigcode/the-stack-usernames", split="train")["usernames"]
-    return set(list_of_usernames)
 st.title("Am I in The Stack?")
 st.markdown("This tool lets you check if a repository under a given username is part of [The Stack dataset](https://huggingface.co/datasets/bigcode/the-stack).")
 usernames = load_all_usernames()
-username = st.text_input("GitHub Username:")
 if st.button("Check!"):
     if username in usernames:
-        st.markdown("**Yes**, your data is in The Stack.")
     else:
         st.markdown("**No**, your data is not in The Stack.")

 from datasets import load_dataset
 import streamlit as st
+from huggingface_hub import hf_hub_download
+import gzip
+import json
 @st.cache(allow_output_mutation=True)
 def load_all_usernames():
+    filepath = hf_hub_download(repo_id="bigcode/the-stack-username-to-repo", filename="username_to_repo.json.gz", repo_type="dataset")
+    with gzip.open(filepath, 'r') as f:
+        usernames = json.loads(f.read().decode('utf-8'))
+    return usernames
 st.title("Am I in The Stack?")
 st.markdown("This tool lets you check if a repository under a given username is part of [The Stack dataset](https://huggingface.co/datasets/bigcode/the-stack).")
 usernames = load_all_usernames()
+username = st.text_input("Your GitHub Username:")
 if st.button("Check!"):
     if username in usernames:
+        st.markdown("**Yes**, your data is in The Stack:")
+        st.markdown("\n".join([f"`{repo_name}`" for repo_name in usernames[username]]))
     else:
         st.markdown("**No**, your data is not in The Stack.")