File size: 2,807 Bytes
a2b3335 ac13da9 a2b3335 ac13da9 4bb8c82 ac13da9 dd09402 ac13da9 dd09402 ac13da9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
---
language:
- en
license: mit
---
This model was trained with [Neural-Cherche](https://github.com/raphaelsty/neural-cherche). You can find details on how to fine-tune it in the [Neural-Cherche](https://github.com/raphaelsty/neural-cherche) repository.
This model is an `all-mpnet-base-v2` as a ColBERT.
```sh
pip install neural-cherche
```
## Retriever
```python
from neural_cherche import models, retrieve
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
batch_size = 32
documents = [
{"id": 0, "document": "Food"},
{"id": 1, "document": "Sports"},
{"id": 2, "document": "Cinema"},
]
queries = ["Food", "Sports", "Cinema"]
model = models.ColBERT(
model_name_or_path="raphaelsty/neural-cherche-colbert",
device=device,
)
retriever = retrieve.ColBERT(
key="id",
on=["document"],
model=model,
)
documents_embeddings = retriever.encode_documents(
documents=documents,
batch_size=batch_size,
)
retriever = retriever.add(
documents_embeddings=documents_embeddings,
)
queries_embeddings = retriever.encode_queries(
queries=queries,
batch_size=batch_size,
)
scores = retriever(
queries_embeddings=queries_embeddings,
batch_size=batch_size,
k=3,
)
scores
```
## Ranker
```python
from neural_cherche import models, rank, retrieve
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
batch_size = 32
documents = [
{"id": "doc1", "title": "Paris", "text": "Paris is the capital of France."},
{"id": "doc2", "title": "Montreal", "text": "Montreal is the largest city in Quebec."},
{"id": "doc3", "title": "Bordeaux", "text": "Bordeaux in Southwestern France."},
]
queries = [
"What is the capital of France?",
"What is the largest city in Quebec?",
"Where is Bordeaux?",
]
retriever = retrieve.TfIdf(
key="id",
on=["title", "text"],
)
model = models.ColBERT(
model_name_or_path="raphaelsty/neural-cherche-colbert",
device=device,
)
ranker = rank.ColBERT(
key="id",
on=["title", "text"],
model=model
)
retriever_documents_embeddings = retriever.encode_documents(
documents=documents,
)
retriever.add(
documents_embeddings=retriever_documents_embeddings,
)
ranker_documents_embeddings = ranker.encode_documents(
documents=documents,
batch_size=batch_size,
)
retriever_queries_embeddings = retriever.encode_queries(
queries=queries,
)
ranker_queries_embeddings = ranker.encode_queries(
queries=queries,
batch_size=batch_size,
)
candidates = retriever(
queries_embeddings=retriever_queries_embeddings,
k=1000,
)
scores = ranker(
documents=candidates,
queries_embeddings=ranker_queries_embeddings,
documents_embeddings=ranker_documents_embeddings,
k=100,
batch_size=32,
)
scores
``` |