Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 1,614 Bytes
43fa15b f49fa9f c567921 e56a788 43fa15b f49fa9f c567921 5cd34a4 c567921 f49fa9f c567921 f49fa9f c567921 f49fa9f c567921 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
from appStore.prep_utils import get_client
from langchain_qdrant import FastEmbedSparse
from torch import cuda
from qdrant_client.http import models
from langchain_huggingface import HuggingFaceEmbeddings
device = 'cuda' if cuda.is_available() else 'cpu'
def hybrid_search(client, query, collection_name, limit=500):
embeddings = HuggingFaceEmbeddings(
model_name='BAAI/bge-m3',
model_kwargs={'device': device},
encode_kwargs={'normalize_embeddings': True}
)
sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
# 1) Embed the query
q_dense = embeddings.embed_query(query)
q_sparse = sparse_embeddings.embed_query(query)
# 2) Request more than 10 items
results = client.search_batch(
collection_name=collection_name,
requests=[
# Dense request
models.SearchRequest(
vector=models.NamedVector(
name="text-dense",
vector=q_dense,
),
limit=limit, # was 10, now uses the parameter
with_payload=True,
),
# Sparse request
models.SearchRequest(
vector=models.NamedSparseVector(
name="text-sparse",
vector=models.SparseVector(
indices=q_sparse.indices,
values=q_sparse.values,
),
),
limit=limit, # was 10, now uses the parameter
with_payload=True,
),
]
)
return results
|