|
from sentence_transformers import CrossEncoder |
|
from transformers import LlamaTokenizer |
|
import torch |
|
|
|
model_name = "OpenBMB/UltraRAG-Reranker" |
|
model = CrossEncoder(model_name,max_length=1024,trust_remote_code=True, automodel_args={"torch_dtype": torch.float16}) |
|
|
|
|
|
model.tokenizer.padding_side = "right" |
|
|
|
query = "中国的首都是哪里?" |
|
passages = ["beijing", "shanghai"] |
|
|
|
INSTRUCTION = "Query: " |
|
query = INSTRUCTION + query |
|
|
|
sentence_pairs = [[query, doc] for doc in passages] |
|
|
|
scores = model.predict(sentence_pairs, convert_to_tensor=True).tolist() |
|
rankings = model.rank(query, passages, return_documents=True, convert_to_tensor=True) |
|
|
|
print(scores) |
|
for ranking in rankings: |
|
print(f"Score: {ranking['score']:.4f}, Corpus: {ranking['text']}") |
|
|
|
|
|
|