tianyang commited on
Commit
ff681b1
·
1 Parent(s): 0934cc2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -28
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from huggingface_hub import hf_hub_download
3
  import json
4
  import gzip
5
  import urllib
@@ -7,52 +7,39 @@ import urllib
7
  usernames = {}
8
 
9
 
10
- filepath = hf_hub_download(repo_id="bigcode/the-stack-username-to-repo", filename="username_to_repo.json.gz", repo_type="dataset", revision="v2.0")
11
- with gzip.open(filepath, 'r') as f:
12
- usernames["v2.0-rc"] = json.loads(f.read().decode('utf-8'))
13
-
14
- filepath = hf_hub_download(repo_id="bigcode/the-stack-username-to-repo", filename="username_to_repo.json.gz", repo_type="dataset", revision="v1.2")
15
- with gzip.open(filepath, 'r') as f:
16
- usernames["v1.2"] = json.loads(f.read().decode('utf-8'))
17
 
18
- filepath = hf_hub_download(repo_id="bigcode/the-stack-username-to-repo", filename="username_to_repo.json.gz", repo_type="dataset", revision="v1.1")
19
  with gzip.open(filepath, 'r') as f:
20
- usernames["v1.1"] = json.loads(f.read().decode('utf-8'))
21
-
22
- filepath = hf_hub_download(repo_id="bigcode/the-stack-username-to-repo", filename="username_to_repo.json.gz", repo_type="dataset")
23
- with gzip.open(filepath, 'r') as f:
24
- usernames["v1.0"] = json.loads(f.read().decode('utf-8'))
25
 
26
  text = """\
27
- ![](https://huggingface.co/spaces/lvwerra/in-the-stack-gr/resolve/main/banner.png)
28
- **_The Stack is an open governance interface between the AI community and the open source community._**
29
 
30
- # Am I in The Stack?
31
 
32
- As part of the BigCode project, we released and maintain [The Stack](https://huggingface.co/datasets/bigcode/the-stack), a 6 TB dataset of permissively licensed source code over 300 programming languages. One of our goals in this project is to give people agency over their source code by letting them decide whether or not it should be used to develop and evaluate machine learning models, as we acknowledge that not all developers may wish to have their data used for that purpose.
33
  """ + """\
34
 
35
- This tool lets you check if a repository under a given username is part of The Stack dataset. Would you like to have your data removed from future versions of The Stack? You can opt-out following the instructions [here](https://www.bigcode-project.org/docs/about/the-stack/#how-can-i-request-that-my-data-be-removed-from-the-stack). Note that previous opt-outs might still be displayed in the release candidate (denoted with "-rc"), which will be removed for the release.
36
  """
37
 
38
  opt_out_text_template = """\
39
  ### Opt-out
40
 
41
- If you want your data to be removed from the stack and model training \
42
- open an issue with <a href="https://github.com/bigcode-project/opt-out-v2/issues/new?title={title}&body={body}" target="_blank">this link</a> \
43
- (if the link doesn't work try right a right click and open it in a new tab) or visit [https://github.com/bigcode-project/opt-out-v2/issues/new?&template=opt-out-request.md](https://github.com/bigcode-project/opt-out-v2/issues/new?&template=opt-out-request.md) .\
44
  """
45
 
46
  opt_out_issue_title = """Opt-out request for {username}"""
47
  opt_out_issue_body = """\
48
- I request that the following data is removed from The Stack and StackOverflow:
49
 
50
- - Commits
51
- - GitHub issue
52
- - StackOverflow: <INSERT_STACKOVERFLOW_USERNAME_HERE>
53
  {repo_list}
54
 
55
- _Note_: If you don't want all resources to be included just remove the elements from the list above. If you would like to exclude all repositories and resources just add a single element "all" to the list.
 
 
 
56
  """
57
 
58
  def issue_url(username, repos):
@@ -82,7 +69,7 @@ with gr.Blocks() as demo:
82
  _, colum_2, _ = gr.Column(scale=1), gr.Column(scale=6), gr.Column(scale=1)
83
  with colum_2:
84
  gr.Markdown(text)
85
- version = gr.Dropdown(["v2.0-rc", "v1.2", "v1.1", "v1.0"], label="The Stack version:", value="v2.0-rc")
86
  username = gr.Text("", label="Your GitHub username:")
87
  check_button = gr.Button("Check!")
88
 
 
1
  import gradio as gr
2
+
3
  import json
4
  import gzip
5
  import urllib
 
7
  usernames = {}
8
 
9
 
10
+ filepath = "username_to_repo.json.gz"
 
 
 
 
 
 
11
 
 
12
  with gzip.open(filepath, 'r') as f:
13
+ usernames["v1.0 (from 2023-09-07 to 2023-10-06)"] = json.loads(f.read().decode('utf-8'))
 
 
 
 
14
 
15
  text = """\
16
+ **_RepoBench is a benchmark specifically designed for evaluating repository-level code auto-completion systems._**
 
17
 
18
+ # Am I in The RepoBench?
19
 
20
+ We released and maintain [RepoBench](https://arxiv.org/abs/2306.03091), built by parsing the most recent github repositories.
21
  """ + """\
22
 
23
+ This tool lets you check if a repository under a given username is part of the RepoBench dataset.
24
  """
25
 
26
  opt_out_text_template = """\
27
  ### Opt-out
28
 
29
+ If you want your data to be removed from RepoBench for model evaluation \
30
+ open an issue with <a href="https://huggingface.co/spaces/tianyang/in-the-repobench/discussions/new?title={title}&body={body}" target="_blank">this link</a>\
 
31
  """
32
 
33
  opt_out_issue_title = """Opt-out request for {username}"""
34
  opt_out_issue_body = """\
35
+ I request that the following data is removed from RepoBench:
36
 
 
 
 
37
  {repo_list}
38
 
39
+ _Note_:
40
+
41
+ - If you don't want all resources to be included just remove the elements from the list above. If you would like to exclude all repositories and resources just add a single element "all" to the list.
42
+ - If you don't want all your resources to be included in the future, just add a single element "ALL" to the list.
43
  """
44
 
45
  def issue_url(username, repos):
 
69
  _, colum_2, _ = gr.Column(scale=1), gr.Column(scale=6), gr.Column(scale=1)
70
  with colum_2:
71
  gr.Markdown(text)
72
+ version = gr.Dropdown(["v1.0"], label="The Stack version:", value="v1.0")
73
  username = gr.Text("", label="Your GitHub username:")
74
  check_button = gr.Button("Check!")
75