from appStore.prep_utils import get_client from langchain_qdrant import FastEmbedSparse from torch import cuda from qdrant_client.http import models from langchain_huggingface import HuggingFaceEmbeddings device = 'cuda' if cuda.is_available() else 'cpu' def hybrid_search(client, query, collection_name, limit=500): embeddings = HuggingFaceEmbeddings( model_name='BAAI/bge-m3', model_kwargs={'device': device}, encode_kwargs={'normalize_embeddings': True} ) sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25") # 1) Embed the query q_dense = embeddings.embed_query(query) q_sparse = sparse_embeddings.embed_query(query) # 2) Request more than 10 items results = client.search_batch( collection_name=collection_name, requests=[ # Dense request models.SearchRequest( vector=models.NamedVector( name="text-dense", vector=q_dense, ), limit=limit, # was 10, now uses the parameter with_payload=True, ), # Sparse request models.SearchRequest( vector=models.NamedSparseVector( name="text-sparse", vector=models.SparseVector( indices=q_sparse.indices, values=q_sparse.values, ), ), limit=limit, # was 10, now uses the parameter with_payload=True, ), ] ) return results