Spaces:
AIR-Bench
/
Running on CPU Upgrade

nan commited on
Commit
b9d42b4
·
1 Parent(s): c48db83

feat: update the about and submission tab

Browse files
Files changed (2) hide show
  1. app.py +2 -0
  2. src/about.py +117 -12
app.py CHANGED
@@ -202,6 +202,7 @@ with demo:
202
  with gr.Column():
203
  with gr.Row():
204
  search_bar = gr.Textbox(
 
205
  placeholder=" 🔍 Search for retrieval models (separate multiple queries with `;`) and press ENTER...",
206
  show_label=False,
207
  elem_id="search-bar-long-doc",
@@ -276,6 +277,7 @@ with demo:
276
  selected_langs,
277
  selected_rerankings,
278
  search_bar,
 
279
  ],
280
  leaderboard_table_long_doc,
281
  )
 
202
  with gr.Column():
203
  with gr.Row():
204
  search_bar = gr.Textbox(
205
+ info="Search the retrieval models",
206
  placeholder=" 🔍 Search for retrieval models (separate multiple queries with `;`) and press ENTER...",
207
  show_label=False,
208
  elem_id="search-bar-long-doc",
 
277
  selected_langs,
278
  selected_rerankings,
279
  search_bar,
280
+ show_anonymous,
281
  ],
282
  leaderboard_table_long_doc,
283
  )
src/about.py CHANGED
@@ -10,24 +10,129 @@ AIR-Bench: Automated Heterogeneous Information Retrieval Benchmark
10
  BENCHMARKS_TEXT = f"""
11
  ## How it works
12
 
13
- ## Reproducibility
14
- To reproduce our results, here is the commands you can run:
15
-
16
  """
17
 
18
  EVALUATION_QUEUE_TEXT = """
19
- ## Some good practices before submitting a model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- ### 1)
22
- ### 2)
23
- ### 3)
24
- ### 4)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- ## In case of model failure
28
- If your model is displayed in the `FAILED` category, its execution stopped.
29
- Make sure you have followed the above steps first.
30
- If everything is done, check you can launch the EleutherAIHarness on your model locally, using the above command without modifications (you can add `--limit` to limit the number of examples per task).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  """
32
 
33
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
 
10
  BENCHMARKS_TEXT = f"""
11
  ## How it works
12
 
13
+ Check more information at [our GitHub repo](https://github.com/AIR-Bench/AIR-Bench)
 
 
14
  """
15
 
16
  EVALUATION_QUEUE_TEXT = """
17
+ ## Steps for submit to AIR-Bench
18
+
19
+ 1. Install AIR-Bench
20
+ ```bash
21
+ # Clone the repo
22
+ git clone https://github.com/AIR-Bench/AIR-Bench.git
23
+
24
+ # Install the package
25
+ cd AIR-Bench
26
+ pip install .
27
+ ```
28
+ 2. Run the evaluation script
29
+ ```bash
30
+ cd AIR-Bench/scripts
31
+ # Run all tasks
32
+ python run_AIR-Bench.py \
33
+ --output_dir ./search_results \
34
+ --encoder BAAI/bge-m3 \
35
+ --encoder_link https://huggingface.co/BAAI/bge-m3 \
36
+ --reranker BAAI/bge-reranker-v2-m3 \
37
+ --reranker_link https://huggingface.co/BAAI/bge-reranker-v2-m3 \
38
+ --search_top_k 1000 \
39
+ --rerank_top_k 100 \
40
+ --max_query_length 512 \
41
+ --max_passage_length 512 \
42
+ --batch_size 512 \
43
+ --pooling_method cls \
44
+ --normalize_embeddings True \
45
+ --use_fp16 True \
46
+ --add_instruction False \
47
+ --overwrite False
48
+
49
+ # Run the tasks in the specified task type
50
+ python run_AIR-Bench.py \
51
+ --task_type long-doc \
52
+ --output_dir ./search_results \
53
+ --encoder BAAI/bge-m3 \
54
+ --encoder_link https://huggingface.co/BAAI/bge-m3 \
55
+ --reranker BAAI/bge-reranker-v2-m3 \
56
+ --reranker_link https://huggingface.co/BAAI/bge-reranker-v2-m3 \
57
+ --search_top_k 1000 \
58
+ --rerank_top_k 100 \
59
+ --max_query_length 512 \
60
+ --max_passage_length 512 \
61
+ --batch_size 512 \
62
+ --pooling_method cls \
63
+ --normalize_embeddings True \
64
+ --use_fp16 True \
65
+ --add_instruction False \
66
+ --overwrite False
67
 
68
+ # Run the tasks in the specified task type and domains
69
+ python run_AIR-Bench.py \
70
+ --task_type long-doc \
71
+ --domain arxiv book \
72
+ --output_dir ./search_results \
73
+ --encoder BAAI/bge-m3 \
74
+ --encoder_link https://huggingface.co/BAAI/bge-m3 \
75
+ --reranker BAAI/bge-reranker-v2-m3 \
76
+ --reranker_link https://huggingface.co/BAAI/bge-reranker-v2-m3 \
77
+ --search_top_k 1000 \
78
+ --rerank_top_k 100 \
79
+ --max_query_length 512 \
80
+ --max_passage_length 512 \
81
+ --batch_size 512 \
82
+ --pooling_method cls \
83
+ --normalize_embeddings True \
84
+ --use_fp16 True \
85
+ --add_instruction False \
86
+ --overwrite False
87
 
88
+ # Run the tasks in the specified languages
89
+ python run_AIR-Bench.py \
90
+ --language en \
91
+ --output_dir ./search_results \
92
+ --encoder BAAI/bge-m3 \
93
+ --encoder_link https://huggingface.co/BAAI/bge-m3 \
94
+ --reranker BAAI/bge-reranker-v2-m3 \
95
+ --reranker_link https://huggingface.co/BAAI/bge-reranker-v2-m3 \
96
+ --search_top_k 1000 \
97
+ --rerank_top_k 100 \
98
+ --max_query_length 512 \
99
+ --max_passage_length 512 \
100
+ --batch_size 512 \
101
+ --pooling_method cls \
102
+ --normalize_embeddings True \
103
+ --use_fp16 True \
104
+ --add_instruction False \
105
+ --overwrite False
106
 
107
+ # Run the tasks in the specified task type, domains, and languages
108
+ python run_AIR-Bench.py \
109
+ --task_type qa \
110
+ --domains wiki web \
111
+ --languages en \
112
+ --output_dir ./search_results \
113
+ --encoder BAAI/bge-m3 \
114
+ --encoder_link https://huggingface.co/BAAI/bge-m3 \
115
+ --reranker BAAI/bge-reranker-v2-m3 \
116
+ --reranker_link https://huggingface.co/BAAI/bge-reranker-v2-m3 \
117
+ --search_top_k 1000 \
118
+ --rerank_top_k 100 \
119
+ --max_query_length 512 \
120
+ --max_passage_length 512 \
121
+ --batch_size 512 \
122
+ --pooling_method cls \
123
+ --normalize_embeddings True \
124
+ --use_fp16 True \
125
+ --add_instruction False \
126
+ --overwrite False
127
+ ```
128
+ 3. Package the search results.
129
+ ```bash
130
+ python zip_results.py \
131
+ --results_path search_results/bge-m3 \
132
+ --save_path search_results/zipped_results
133
+ ```
134
+ 4. Upload the `.zip` file on this page and fill in the model information.
135
+ 5. Congratulation! Your results will be shown on the leaderboard in up to one hour.
136
  """
137
 
138
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"