hysts HF staff commited on
Commit
0396aac
·
1 Parent(s): 90ca727

Remake this Space

Browse files
Files changed (8) hide show
  1. .gitattributes +1 -0
  2. .pre-commit-config.yaml +35 -0
  3. .style.yapf +5 -0
  4. app.py +74 -17
  5. paper_list.py +102 -0
  6. papers.csv +0 -0
  7. requirements.txt +1 -1
  8. style.css +22 -0
.gitattributes CHANGED
@@ -1,3 +1,4 @@
 
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
 
1
+ *.csv filter=lfs diff=lfs merge=lfs -text
2
  *.7z filter=lfs diff=lfs merge=lfs -text
3
  *.arrow filter=lfs diff=lfs merge=lfs -text
4
  *.bin filter=lfs diff=lfs merge=lfs -text
.pre-commit-config.yaml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v4.2.0
4
+ hooks:
5
+ - id: check-executables-have-shebangs
6
+ - id: check-json
7
+ - id: check-merge-conflict
8
+ - id: check-shebang-scripts-are-executable
9
+ - id: check-toml
10
+ - id: check-yaml
11
+ - id: double-quote-string-fixer
12
+ - id: end-of-file-fixer
13
+ - id: mixed-line-ending
14
+ args: ['--fix=lf']
15
+ - id: requirements-txt-fixer
16
+ - id: trailing-whitespace
17
+ - repo: https://github.com/myint/docformatter
18
+ rev: v1.4
19
+ hooks:
20
+ - id: docformatter
21
+ args: ['--in-place']
22
+ - repo: https://github.com/pycqa/isort
23
+ rev: 5.10.1
24
+ hooks:
25
+ - id: isort
26
+ - repo: https://github.com/pre-commit/mirrors-mypy
27
+ rev: v0.812
28
+ hooks:
29
+ - id: mypy
30
+ args: ['--ignore-missing-imports']
31
+ - repo: https://github.com/google/yapf
32
+ rev: v0.32.0
33
+ hooks:
34
+ - id: yapf
35
+ args: ['--parallel', '--in-place']
.style.yapf ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ [style]
2
+ based_on_style = pep8
3
+ blank_line_before_nested_class_or_def = false
4
+ spaces_before_comment = 2
5
+ split_before_logical_operator = true
app.py CHANGED
@@ -1,24 +1,81 @@
 
 
 
 
1
  import gradio as gr
2
- import pandas as pd
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- def update():
6
- return pd.read_csv('papers.csv', header=0)
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- block = gr.Blocks()
10
 
11
- with block:
12
- gr.Markdown(
13
- """# Papers List for NAACL 2022 as part of the NAACL event, To learn more and join, see <a href="https://huggingface.co/NAACL2022" target="_blank" style="text-decoration: underline">NAACL event</a>"""
14
- )
15
- with gr.Tabs():
16
- with gr.TabItem("NAACL papers list"):
17
- with gr.Row():
18
- data = gr.Dataframe(type="pandas", wrap=True)
19
- with gr.Row():
20
- data_run = gr.Button("Refresh")
21
- data_run.click(update, inputs=None, outputs=data)
22
 
23
- block.load(update, inputs=None, outputs=data)
24
- block.launch()
 
1
+ #!/usr/bin/env python
2
+
3
+ from __future__ import annotations
4
+
5
  import gradio as gr
 
6
 
7
+ from paper_list import PaperList
8
+
9
+ DESCRIPTION = '# NAACL 2022 Papers'
10
+ NOTES = '''
11
+ - [NAACL 2022](https://2022.naacl.org/)
12
+ '''
13
+ FOOTER = '<img id="visitor-badge" alt="visitor badge" src="https://visitor-badge.glitch.me/badge?page_id=naacl2022.papers" />'
14
+
15
+
16
+ def main():
17
+ paper_list = PaperList()
18
+
19
+ with gr.Blocks(css='style.css') as demo:
20
+ gr.Markdown(DESCRIPTION)
21
+
22
+ search_box = gr.Textbox(
23
+ label='Search Title',
24
+ placeholder=
25
+ 'You can search for titles with regular expressions. e.g. (?<!sur)face'
26
+ )
27
+ case_sensitive = gr.Checkbox(label='Case Sensitive')
28
+ filter_names = gr.CheckboxGroup(choices=[
29
+ 'arXiv',
30
+ 'GitHub',
31
+ 'HF Space',
32
+ 'HF Model',
33
+ 'HF Dataset',
34
+ ],
35
+ label='Filter')
36
+ paper_category_names = [
37
+ 'Long Paper',
38
+ 'Short Paper',
39
+ 'Special Theme Paper',
40
+ 'Findings',
41
+ 'Industry Track',
42
+ ]
43
+ paper_categories = gr.CheckboxGroup(choices=paper_category_names,
44
+ value=paper_category_names,
45
+ label='Paper Categories')
46
+ search_button = gr.Button('Search')
47
+
48
+ number_of_papers = gr.Textbox(label='Number of Papers Found')
49
+ table = gr.HTML(show_label=False)
50
 
51
+ gr.Markdown(NOTES)
52
+ gr.Markdown(FOOTER)
53
 
54
+ demo.load(paper_list.render,
55
+ inputs=[
56
+ search_box,
57
+ case_sensitive,
58
+ filter_names,
59
+ paper_categories,
60
+ ],
61
+ outputs=[
62
+ number_of_papers,
63
+ table,
64
+ ])
65
+ search_button.click(paper_list.render,
66
+ inputs=[
67
+ search_box,
68
+ case_sensitive,
69
+ filter_names,
70
+ paper_categories,
71
+ ],
72
+ outputs=[
73
+ number_of_papers,
74
+ table,
75
+ ])
76
 
77
+ demo.launch(enable_queue=True, share=False)
78
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
+ if __name__ == '__main__':
81
+ main()
paper_list.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import pandas as pd
4
+
5
+
6
+ class PaperList:
7
+ def __init__(self):
8
+ self.table = pd.read_csv('papers.csv')
9
+ self._preprcess_table()
10
+
11
+ self.table_header = '''
12
+ <tr>
13
+ <td width="50%">Paper</td>
14
+ <td width="22%">Authors</td>
15
+ <td width="4%">pdf</td>
16
+ <td width="4%">category</td>
17
+ <td width="4%">arXiv</td>
18
+ <td width="4%">GitHub</td>
19
+ <td width="4%">HF Spaces</td>
20
+ <td width="4%">HF Models</td>
21
+ <td width="4%">HF Datasets</td>
22
+ </tr>'''
23
+
24
+ def _preprcess_table(self) -> None:
25
+ self.table['title_lowercase'] = self.table.title.str.lower()
26
+
27
+ rows = []
28
+ for row in self.table.itertuples():
29
+ paper = f'<a href="{row.url}" target="_blank">{row.title}</a>' if isinstance(
30
+ row.url, str) else row.title
31
+ pdf = f'<a href="{row.pdf}" target="_blank">pdf</a>' if isinstance(
32
+ row.pdf, str) else ''
33
+ arxiv = f'<a href="{row.arxiv}" target="_blank">arXiv</a>' if isinstance(
34
+ row.arxiv, str) else ''
35
+ github = f'<a href="{row.github}" target="_blank">GitHub</a>' if isinstance(
36
+ row.github, str) else ''
37
+ hf_space = f'<a href="{row.hf_space}" target="_blank">Space</a>' if isinstance(
38
+ row.hf_space, str) else ''
39
+ hf_model = f'<a href="{row.hf_model}" target="_blank">Model</a>' if isinstance(
40
+ row.hf_model, str) else ''
41
+ hf_dataset = f'<a href="{row.hf_dataset}" target="_blank">Dataset</a>' if isinstance(
42
+ row.hf_dataset, str) else ''
43
+ row = f'''
44
+ <tr>
45
+ <td>{paper}</td>
46
+ <td>{row.authors}</td>
47
+ <td>{pdf}</td>
48
+ <td>{row.category}</td>
49
+ <td>{arxiv}</td>
50
+ <td>{github}</td>
51
+ <td>{hf_space}</td>
52
+ <td>{hf_model}</td>
53
+ <td>{hf_dataset}</td>
54
+ </tr>'''
55
+ rows.append(row)
56
+ self.table['html_table_content'] = rows
57
+
58
+ def render(self, search_query: str, case_sensitive: bool,
59
+ filter_names: list[str],
60
+ paper_categories: list[str]) -> tuple[int, str]:
61
+ df = self.table
62
+ if search_query:
63
+ if case_sensitive:
64
+ df = df[df.title.str.contains(search_query)]
65
+ else:
66
+ df = df[df.title_lowercase.str.contains(search_query.lower())]
67
+ has_arxiv = 'arXiv' in filter_names
68
+ has_github = 'GitHub' in filter_names
69
+ has_hf_space = 'HF Space' in filter_names
70
+ has_hf_model = 'HF Model' in filter_names
71
+ has_hf_dataset = 'HF Dataset' in filter_names
72
+ df = self.filter_table(df, has_arxiv, has_github, has_hf_space,
73
+ has_hf_model, has_hf_dataset, paper_categories)
74
+ return len(df), self.to_html(df, self.table_header)
75
+
76
+ @staticmethod
77
+ def filter_table(df: pd.DataFrame, has_arxiv: bool, has_github: bool,
78
+ has_hf_space: bool, has_hf_model: bool,
79
+ has_hf_dataset: bool,
80
+ paper_categories: list[str]) -> pd.DataFrame:
81
+ if has_arxiv:
82
+ df = df[~df.arxiv.isna()]
83
+ if has_github:
84
+ df = df[~df.github.isna()]
85
+ if has_hf_space:
86
+ df = df[~df.hf_space.isna()]
87
+ if has_hf_model:
88
+ df = df[~df.hf_model.isna()]
89
+ if has_hf_dataset:
90
+ df = df[~df.hf_dataset.isna()]
91
+ df = df[df.category.isin(set(paper_categories))]
92
+ return df
93
+
94
+ @staticmethod
95
+ def to_html(df: pd.DataFrame, table_header: str) -> str:
96
+ table_data = ''.join(df.html_table_content)
97
+ html = f'''
98
+ <table>
99
+ {table_header}
100
+ {table_data}
101
+ </table>'''
102
+ return html
papers.csv CHANGED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -1 +1 @@
1
- lxml
 
1
+ lxml
style.css ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ h1 {
2
+ text-align: center;
3
+ }
4
+ table a {
5
+ background-color: transparent;
6
+ color: #58a6ff;
7
+ text-decoration: none;
8
+ }
9
+ a:active,
10
+ a:hover {
11
+ outline-width: 0;
12
+ }
13
+ a:hover {
14
+ text-decoration: underline;
15
+ }
16
+ table, th, td {
17
+ border: 1px solid;
18
+ }
19
+ img#visitor-badge {
20
+ display: block;
21
+ margin: auto;
22
+ }