Spaces:
Running
Running
File size: 3,235 Bytes
48dc2f2 ff681b1 fcb283e 48dc2f2 6127ee8 a431d31 48dc2f2 a431d31 5518841 340ba2a 6127ee8 48dc2f2 ff681b1 a431d31 340ba2a bb0d55f 48dc2f2 3ab1469 ff681b1 7666b36 ff681b1 2cce3db ff681b1 48dc2f2 a431d31 ff681b1 48dc2f2 a431d31 6127ee8 ff681b1 33aa037 6127ee8 ff681b1 6127ee8 ff681b1 6127ee8 48dc2f2 c4eee53 33aa037 5ee220a 48dc2f2 6127ee8 b0c1665 7bd10e0 6127ee8 48dc2f2 bb0d55f 3ab8448 33aa037 48dc2f2 6127ee8 48dc2f2 6127ee8 48dc2f2 3ab1469 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import gradio as gr
import json
import gzip
import urllib
usernames = {}
filepath = "username_to_repo_v1.0.json.gz"
with gzip.open(filepath, 'r') as f:
usernames["v1.0 (from 2023-09-07 to 2023-10-06)"] = json.loads(f.read().decode('utf-8'))
filepath = "username_to_repo_v1.1.json.gz"
with gzip.open(filepath, 'r') as f:
usernames["v1.1 (from 2023-10-07 to 2023-12-31)"] = json.loads(f.read().decode('utf-8'))
text = """\

**_RepoBench is a benchmark specifically designed for evaluating repository-level code auto-completion systems._**
# Am I in The RepoBench?
We released and maintain [RepoBench](https://arxiv.org/abs/2306.03091), built by parsing the most recent github repositories.
""" + """\
This tool lets you check if a repository under a given username is part of the RepoBench dataset.
"""
opt_out_text_template = """\
### Opt-out
If you want your data to be removed from RepoBench for model evaluation \
open an issue with <a href="https://github.com/Leolty/repobench/issues/new?title={title}&body={body}" target="_blank">this link</a>\
"""
opt_out_issue_title = """Opt-out request for {username}"""
opt_out_issue_body = """\
I request that the following data is removed from RepoBench:
{repo_list}
_Note_:
- If you don't want all resources to be included just remove the elements from the list above. If you would like to exclude all repositories and resources just add a single element "all" to the list.
- If you don't want all your resources to be included in the future, just add a single element "ALL" to the list.
"""
def issue_url(username, repos):
title = urllib.parse.quote(opt_out_issue_title.format(username=username))
body = urllib.parse.quote(opt_out_issue_body.format(repo_list=" - "+ "\n - ".join(repos)))
opt_out_text = opt_out_text_template.format(title=title, body=body)
return opt_out_text
def check_username(username, version):
output_md = ""
if username in usernames[version] and len(usernames[version][username])>0:
repos = usernames[version][username]
repo_word = "repository" if len(repos)==1 else "repositories"
output_md += f"**Yes**, there is code from **{len(repos)} {repo_word}** in RepoBench:\n\n"
for repo in repos:
output_md += f"_{repo}_\n\n"
return output_md.strip(), issue_url(username, repos)
else:
output_md += "**No**, your code is not in RepoBench."
return output_md.strip(), ""
with gr.Blocks() as demo:
with gr.Row():
_, colum_2, _ = gr.Column(scale=1), gr.Column(scale=6), gr.Column(scale=1)
with colum_2:
gr.Markdown(text)
version = gr.Dropdown([
"v1.0 (from 2023-09-07 to 2023-10-06)",
"v1.1 (from 2023-10-07 to 2023-12-31)"
], label="RepoBench version:", value="v1.1 (from 2023-10-07 to 2023-12-31)")
username = gr.Text("", label="Your GitHub username:")
check_button = gr.Button("Check!")
repos = gr.Markdown()
opt_out = gr.Markdown()
check_button.click(check_username, [username, version], [repos, opt_out])
app.launch(allowed_paths=["./"]) |