Update README.md
Browse files
README.md
CHANGED
@@ -183,7 +183,7 @@ def make_reranker_input(t, q):
|
|
183 |
return f"<<<Context>>>\n{t}\n\n<<<Query>>>\n{q}"
|
184 |
|
185 |
def make_reranker_inference_conversation(context, question):
|
186 |
-
system_message = "Given a
|
187 |
|
188 |
return [
|
189 |
{"role": "system", "content": system_message},
|
@@ -237,7 +237,7 @@ def make_reranker_input(t, q):
|
|
237 |
return f"<<<Context>>>\n{t}\n\n<<<Query>>>\n{q}"
|
238 |
|
239 |
def make_reranker_inference_conversation(context, question):
|
240 |
-
system_message = "Given a
|
241 |
|
242 |
return [
|
243 |
{"role": "system", "content": system_message},
|
@@ -302,7 +302,7 @@ def make_reranker_input(t, q):
|
|
302 |
return f"<<<Context>>>\n{t}\n\n<<<Query>>>\n{q}"
|
303 |
|
304 |
def make_reranker_inference_conversation(context, question):
|
305 |
-
system_message = "Given a
|
306 |
|
307 |
return [
|
308 |
{"role": "system", "content": system_message},
|
@@ -345,36 +345,6 @@ print(expected_vals)
|
|
345 |
</details></li>
|
346 |
</ul>
|
347 |
|
348 |
-
# Evaluation
|
349 |
-
|
350 |
-
We perform an evaluation on 9 datasets from the [BEIR benchmark](https://github.com/beir-cellar/beir) that none of the evaluated models have been trained upon (to our knowledge).
|
351 |
-
|
352 |
-
* Arguana
|
353 |
-
* Dbpedia-entity
|
354 |
-
* Fiqa
|
355 |
-
* NFcorpus
|
356 |
-
* Scidocs
|
357 |
-
* Scifact
|
358 |
-
* Trec-covid-v2
|
359 |
-
* Vihealthqa
|
360 |
-
* Webis-touche2020
|
361 |
-
|
362 |
-
We evaluate on a subset of all queries (the first 250) to save evaluation time.
|
363 |
-
|
364 |
-
We find that our model performs similarly or better than many of the state-of-the-art reranker models in our evaluation, without compromising on inference speed.
|
365 |
-
|
366 |
-
We make our evaluation code and results available [on our Github](https://github.com/lightblue-tech/lb-reranker/blob/main/run_bier.ipynb).
|
367 |
-
|
368 |
-

|
369 |
-
|
370 |
-

|
371 |
-
|
372 |
-
As we can see, this reranker attains greater IR evaluation metrics compared to the two benchmarks we include for all positions apart from @1.
|
373 |
-
|
374 |
-

|
375 |
-
|
376 |
-
We also show that our model is, on average, faster than the BGE reranker v2.
|
377 |
-
|
378 |
# License
|
379 |
|
380 |
We share this model under an Apache 2.0 license.
|
|
|
183 |
return f"<<<Context>>>\n{t}\n\n<<<Query>>>\n{q}"
|
184 |
|
185 |
def make_reranker_inference_conversation(context, question):
|
186 |
+
system_message = "Given a piece of text and a query, output a score of 1-7 based on how related the query is to the text. 1 means least related and 7 is most related."
|
187 |
|
188 |
return [
|
189 |
{"role": "system", "content": system_message},
|
|
|
237 |
return f"<<<Context>>>\n{t}\n\n<<<Query>>>\n{q}"
|
238 |
|
239 |
def make_reranker_inference_conversation(context, question):
|
240 |
+
system_message = "Given a piece of text and a query, output a score of 1-7 based on how related the query is to the text. 1 means least related and 7 is most related."
|
241 |
|
242 |
return [
|
243 |
{"role": "system", "content": system_message},
|
|
|
302 |
return f"<<<Context>>>\n{t}\n\n<<<Query>>>\n{q}"
|
303 |
|
304 |
def make_reranker_inference_conversation(context, question):
|
305 |
+
system_message = "Given a piece of text and a query, output a score of 1-7 based on how related the query is to the text. 1 means least related and 7 is most related."
|
306 |
|
307 |
return [
|
308 |
{"role": "system", "content": system_message},
|
|
|
345 |
</details></li>
|
346 |
</ul>
|
347 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
348 |
# License
|
349 |
|
350 |
We share this model under an Apache 2.0 license.
|