Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']
|
3 |
+
|
4 |
+
import gradio as gr
|
5 |
+
import pandas as pd
|
6 |
+
|
7 |
+
|
8 |
+
COLUMN_NAMES = ["Model", "Size", "Avg.", "PPDB", "PPDB filtered", "Turney", "BIRD", "YAGO", "UMLS", "CoNLL", "BC5CDR", "AutoFJ"]
|
9 |
+
|
10 |
+
UNTUNED_MODEL_RESULTS = '''[FastText](https://fasttext.cc/) &--&94.4&61.2&59.6&58.9&16.9&14.5&3.0&0.2&53.6 \\
|
11 |
+
[Sentence-BERT](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) &110M&94.6&66.8&50.4&62.6&21.6&23.6&25.5&48.4&57.2 \\
|
12 |
+
[Phrase-BERT](https://huggingface.co/whaleloops/phrase-bert) &110M&96.8&68.7&57.2&68.8&23.7&26.1&35.4&59.5&66.9 \\
|
13 |
+
[UCTopic](https://github.com/JiachengLi1995/UCTopic) &240M&91.2&64.6&60.2&60.2&5.2&6.9&18.3&33.3&29.5 \\
|
14 |
+
[E5-small](https://huggingface.co/intfloat/e5-small-v2) &34M&96.0&56.8&55.9&63.1&43.3&42.0&27.6&53.7&74.8 \\
|
15 |
+
[E5-base](https://huggingface.co/intfloat/e5-base-v2) &110M&95.4&65.6&59.4&66.3&47.3&44.0&32.0&69.3&76.1\\
|
16 |
+
[PEARL-small](https://huggingface.co/Lihuchen/pearl_small) &34M& 97.0&70.2&57.9&68.1& 48.1&44.5&42.4&59.3&75.2\\
|
17 |
+
[PEARL-base](https://huggingface.co/Lihuchen/pearl_base) &110M&97.3&72.2&59.7&72.6&50.7&45.8&39.3&69.4&77.1\\'''
|
18 |
+
|
19 |
+
|
20 |
+
def parse_line(line):
|
21 |
+
model_results = line.replace(" ", "").strip("\\").split("&")
|
22 |
+
for i in range(1, len(model_results)):
|
23 |
+
if i == 1:
|
24 |
+
res = model_results[1]
|
25 |
+
else:
|
26 |
+
res = float(model_results[i])
|
27 |
+
model_results[i] = res
|
28 |
+
return model_results
|
29 |
+
|
30 |
+
def get_baseline_df():
|
31 |
+
df_data = []
|
32 |
+
|
33 |
+
lines = UNTUNED_MODEL_RESULTS.split("\n")
|
34 |
+
for line in lines:
|
35 |
+
model_results = parse_line(line)
|
36 |
+
print(model_results)
|
37 |
+
assert len(model_results) == 11
|
38 |
+
avg = sum(model_results[2:]) / 9
|
39 |
+
model_results.insert(2, avg)
|
40 |
+
#model_results.insert(1, "False")
|
41 |
+
df_data.append(model_results)
|
42 |
+
# lines = TUNED_MODEL_RESULTS.split("\n")
|
43 |
+
# for line in lines:
|
44 |
+
# model_results = parse_line(line)
|
45 |
+
# assert len(model_results) == 10
|
46 |
+
# avg = sum(model_results[1:-3] + model_results[-2:]) / 8
|
47 |
+
# model_results.insert(1, avg)
|
48 |
+
# model_results.insert(1, "True")
|
49 |
+
# df_data.append(model_results)
|
50 |
+
|
51 |
+
print(len(df_data))
|
52 |
+
df = pd.DataFrame(df_data, columns=COLUMN_NAMES).round(1)
|
53 |
+
print(df.head())
|
54 |
+
return df
|
55 |
+
|
56 |
+
|
57 |
+
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
58 |
+
CITATION_BUTTON_TEXT = r"""@article{chen2024learning,
|
59 |
+
title={Learning High-Quality and General-Purpose Phrase Representations},
|
60 |
+
author={Chen, Lihu and Varoquaux, Ga{\"e}l and Suchanek, Fabian M},
|
61 |
+
journal={arXiv preprint arXiv:2401.10407},
|
62 |
+
year={2024}
|
63 |
+
}
|
64 |
+
}"""
|
65 |
+
|
66 |
+
|
67 |
+
block = gr.Blocks()
|
68 |
+
|
69 |
+
with block:
|
70 |
+
gr.Markdown(
|
71 |
+
"""# π¦ͺβͺ The PEARL-Leaderboard aims to evaluate string embeddings on various tasks.
|
72 |
+
π Our PEARL leaderboard contains 9 phrase-level datasets of five types of tasks, covering both the tasks of data science and natural language processing. <br>
|
73 |
+
| **[ π paper](https://arxiv.org/pdf/2401.10407.pdf)** | **[π€ PEARL-small](https://huggingface.co/Lihuchen/pearl_small)** | **[π€ PEARL-base](https://huggingface.co/Lihuchen/pearl_base)** | π€ **[PEARL-Benchmark](https://huggingface.co/datasets/Lihuchen/pearl_benchmark)** |
|
74 |
+
**[πΎ data](https://zenodo.org/records/10676475)** |
|
75 |
+
"""
|
76 |
+
)
|
77 |
+
|
78 |
+
gr.Markdown(
|
79 |
+
""" ## Task Dectription<br>
|
80 |
+
* **Paraphrase Classification**: PPDB and PPDBfiltered ([Wang et al., 2021](https://aclanthology.org/2021.emnlp-main.846/))
|
81 |
+
* **Phrase Similarity**: Turney ([Turney, 2012](https://arxiv.org/pdf/1309.4035.pdf)) and BIRD ([Asaadi et al., 2019](https://aclanthology.org/N19-1050/))
|
82 |
+
* **Entity Retrieval**: We constructed two datasets based on Yago ([Pellissier Tanon et al., 2020](https://hal-lara.archives-ouvertes.fr/DIG/hal-03108570v1)) and UMLS ([Bodenreider, 2004](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC308795/))
|
83 |
+
* **Entity Clustering**: CoNLL 03 ([Tjong Kim Sang, 2002](https://aclanthology.org/W02-2024/)) and BC5CDR ([Li et al., 2016](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4860626/))
|
84 |
+
* **Fuzzy Join**: AutoFJ benchmark ([Li et al., 2021](https://arxiv.org/abs/2103.04489)) contains 50 diverse fuzzy-join datasets
|
85 |
+
|
86 |
+
"""
|
87 |
+
)
|
88 |
+
with gr.Row():
|
89 |
+
data = gr.components.Dataframe(
|
90 |
+
type="pandas", datatype=["markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
|
91 |
+
)
|
92 |
+
with gr.Row():
|
93 |
+
data_run = gr.Button("Refresh")
|
94 |
+
data_run.click(
|
95 |
+
get_baseline_df, outputs=data
|
96 |
+
)
|
97 |
+
|
98 |
+
with gr.Row():
|
99 |
+
with gr.Accordion("Citation", open=True):
|
100 |
+
citation_button = gr.Textbox(
|
101 |
+
value=CITATION_BUTTON_TEXT,
|
102 |
+
label=CITATION_BUTTON_LABEL,
|
103 |
+
elem_id="citation-button",
|
104 |
+
)
|
105 |
+
#.style(show_copy_button=True)
|
106 |
+
block.load(get_baseline_df, outputs=data)
|
107 |
+
|
108 |
+
block.launch()
|