Spaces:
Running
Running
File size: 2,893 Bytes
48dc2f2 ff681b1 fcb283e 48dc2f2 6127ee8 a431d31 48dc2f2 a431d31 5518841 ff681b1 6127ee8 48dc2f2 ff681b1 a431d31 48dc2f2 ff681b1 7666b36 ff681b1 2cce3db ff681b1 48dc2f2 a431d31 ff681b1 48dc2f2 a431d31 6127ee8 ff681b1 6127ee8 ff681b1 6127ee8 ff681b1 6127ee8 48dc2f2 c4eee53 48dc2f2 5ee220a 48dc2f2 6127ee8 b0c1665 48dc2f2 6127ee8 48dc2f2 ff681b1 48dc2f2 6127ee8 48dc2f2 6127ee8 48dc2f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import gradio as gr
import json
import gzip
import urllib
usernames = {}
filepath = "username_to_repo.json.gz"
with gzip.open(filepath, 'r') as f:
usernames["v1.0 (from 2023-09-07 to 2023-10-06)"] = json.loads(f.read().decode('utf-8'))
text = """\
**_RepoBench is a benchmark specifically designed for evaluating repository-level code auto-completion systems._**
# Am I in The RepoBench?
We released and maintain [RepoBench](https://arxiv.org/abs/2306.03091), built by parsing the most recent github repositories.
""" + """\
This tool lets you check if a repository under a given username is part of the RepoBench dataset.
"""
opt_out_text_template = """\
### Opt-out
If you want your data to be removed from RepoBench for model evaluation \
open an issue with <a href="https://huggingface.co/spaces/tianyang/in-the-repobench/discussions/new?title={title}&body={body}" target="_blank">this link</a>\
"""
opt_out_issue_title = """Opt-out request for {username}"""
opt_out_issue_body = """\
I request that the following data is removed from RepoBench:
{repo_list}
_Note_:
- If you don't want all resources to be included just remove the elements from the list above. If you would like to exclude all repositories and resources just add a single element "all" to the list.
- If you don't want all your resources to be included in the future, just add a single element "ALL" to the list.
"""
def issue_url(username, repos):
title = urllib.parse.quote(opt_out_issue_title.format(username=username))
body = urllib.parse.quote(opt_out_issue_body.format(repo_list=" - "+ "\n - ".join(repos)))
opt_out_text = opt_out_text_template.format(title=title, body=body)
return opt_out_text
def check_username(username, version):
output_md = ""
if username in usernames[version] and len(usernames[version][username])>0:
repos = usernames[version][username]
repo_word = "repository" if len(repos)==1 else "repositories"
output_md += f"**Yes**, there is code from **{len(repos)} {repo_word}** in The Stack:\n\n"
for repo in repos:
output_md += f"_{repo}_\n\n"
return output_md.strip(), issue_url(username, repos)
else:
output_md += "**No**, your code is not in The Stack."
return output_md.strip(), ""
with gr.Blocks() as demo:
with gr.Row():
_, colum_2, _ = gr.Column(scale=1), gr.Column(scale=6), gr.Column(scale=1)
with colum_2:
gr.Markdown(text)
version = gr.Dropdown(["v1.0"], label="The Stack version:", value="v1.0")
username = gr.Text("", label="Your GitHub username:")
check_button = gr.Button("Check!")
repos = gr.Markdown()
opt_out = gr.Markdown()
check_button.click(check_username, [username, version], [repos, opt_out])
demo.launch() |