Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,20 +1,27 @@
|
|
1 |
from datasets import load_dataset
|
2 |
import streamlit as st
|
|
|
|
|
|
|
3 |
|
4 |
|
5 |
@st.cache(allow_output_mutation=True)
|
6 |
def load_all_usernames():
|
7 |
-
|
8 |
-
|
|
|
|
|
|
|
9 |
|
10 |
st.title("Am I in The Stack?")
|
11 |
st.markdown("This tool lets you check if a repository under a given username is part of [The Stack dataset](https://huggingface.co/datasets/bigcode/the-stack).")
|
12 |
usernames = load_all_usernames()
|
13 |
|
14 |
-
username = st.text_input("GitHub Username:")
|
15 |
|
16 |
if st.button("Check!"):
|
17 |
if username in usernames:
|
18 |
-
st.markdown("**Yes**, your data is in The Stack
|
|
|
19 |
else:
|
20 |
st.markdown("**No**, your data is not in The Stack.")
|
|
|
1 |
from datasets import load_dataset
|
2 |
import streamlit as st
|
3 |
+
from huggingface_hub import hf_hub_download
|
4 |
+
import gzip
|
5 |
+
import json
|
6 |
|
7 |
|
8 |
@st.cache(allow_output_mutation=True)
|
9 |
def load_all_usernames():
|
10 |
+
filepath = hf_hub_download(repo_id="bigcode/the-stack-username-to-repo", filename="username_to_repo.json.gz", repo_type="dataset")
|
11 |
+
|
12 |
+
with gzip.open(filepath, 'r') as f:
|
13 |
+
usernames = json.loads(f.read().decode('utf-8'))
|
14 |
+
return usernames
|
15 |
|
16 |
st.title("Am I in The Stack?")
|
17 |
st.markdown("This tool lets you check if a repository under a given username is part of [The Stack dataset](https://huggingface.co/datasets/bigcode/the-stack).")
|
18 |
usernames = load_all_usernames()
|
19 |
|
20 |
+
username = st.text_input("Your GitHub Username:")
|
21 |
|
22 |
if st.button("Check!"):
|
23 |
if username in usernames:
|
24 |
+
st.markdown("**Yes**, your data is in The Stack:")
|
25 |
+
st.markdown("\n".join([f"`{repo_name}`" for repo_name in usernames[username]]))
|
26 |
else:
|
27 |
st.markdown("**No**, your data is not in The Stack.")
|