File size: 3,235 Bytes
48dc2f2
ff681b1
fcb283e
48dc2f2
6127ee8
a431d31
48dc2f2
a431d31
5518841
340ba2a
6127ee8
48dc2f2
ff681b1
a431d31
340ba2a
bb0d55f
 
 
 
48dc2f2
3ab1469
ff681b1
7666b36
ff681b1
2cce3db
ff681b1
48dc2f2
a431d31
ff681b1
48dc2f2
a431d31
6127ee8
 
 
ff681b1
33aa037
6127ee8
 
 
 
ff681b1
6127ee8
 
 
ff681b1
 
 
 
6127ee8
 
 
 
 
 
 
 
 
 
48dc2f2
 
 
 
c4eee53
33aa037
5ee220a
48dc2f2
6127ee8
 
b0c1665
7bd10e0
6127ee8
48dc2f2
 
 
 
 
 
bb0d55f
 
3ab8448
33aa037
48dc2f2
 
 
 
6127ee8
 
48dc2f2
6127ee8
48dc2f2
 
3ab1469
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import gradio as gr

import json
import gzip
import urllib

usernames = {}


filepath = "username_to_repo_v1.0.json.gz"

with gzip.open(filepath, 'r') as f:
    usernames["v1.0 (from 2023-09-07 to 2023-10-06)"] = json.loads(f.read().decode('utf-8'))

filepath = "username_to_repo_v1.1.json.gz"

with gzip.open(filepath, 'r') as f:
    usernames["v1.1 (from 2023-10-07 to 2023-12-31)"] = json.loads(f.read().decode('utf-8'))

text = """\
![](./banner.png)
**_RepoBench is a benchmark specifically designed for evaluating repository-level code auto-completion systems._**

# Am I in The RepoBench?

We released and maintain [RepoBench](https://arxiv.org/abs/2306.03091), built by parsing the most recent github repositories.
""" + """\

This tool lets you check if a repository under a given username is part of the RepoBench dataset. 
"""

opt_out_text_template = """\
### Opt-out

If you want your data to be removed from RepoBench for model evaluation \
open an issue with <a href="https://github.com/Leolty/repobench/issues/new?title={title}&body={body}" target="_blank">this link</a>\
"""

opt_out_issue_title = """Opt-out request for {username}"""
opt_out_issue_body = """\
I request that the following data is removed from RepoBench:

{repo_list}

_Note_: 

- If you don't want all resources to be included just remove the elements from the list above. If you would like to exclude all repositories and resources just add a single element "all" to the list. 
- If you don't want all your resources to be included in the future, just add a single element "ALL" to the list.
"""

def issue_url(username, repos):
    title = urllib.parse.quote(opt_out_issue_title.format(username=username))  
    body = urllib.parse.quote(opt_out_issue_body.format(repo_list=" - "+ "\n - ".join(repos)))
    
    opt_out_text = opt_out_text_template.format(title=title, body=body)
    
    return opt_out_text

def check_username(username, version):
    output_md = ""
    if username in usernames[version] and len(usernames[version][username])>0:
        repos = usernames[version][username]
        repo_word = "repository" if len(repos)==1 else "repositories"
        output_md += f"**Yes**, there is code from **{len(repos)} {repo_word}** in RepoBench:\n\n"
        for repo in repos:
            output_md += f"_{repo}_\n\n"

        return output_md.strip(), issue_url(username, repos)
    else:
        output_md += "**No**, your code is not in RepoBench."
        return output_md.strip(), ""

with gr.Blocks() as demo:
    with gr.Row():
        _, colum_2, _ = gr.Column(scale=1), gr.Column(scale=6), gr.Column(scale=1)
        with colum_2:
            gr.Markdown(text)
            version = gr.Dropdown([
                "v1.0 (from 2023-09-07 to 2023-10-06)",
                "v1.1 (from 2023-10-07 to 2023-12-31)"
            ], label="RepoBench version:", value="v1.1 (from 2023-10-07 to 2023-12-31)")
            username = gr.Text("", label="Your GitHub username:")
            check_button = gr.Button("Check!")
            
            repos = gr.Markdown()
            opt_out = gr.Markdown()

            
            check_button.click(check_username, [username, version], [repos, opt_out])


 app.launch(allowed_paths=["./"])