Spaces:
Sleeping
Sleeping
File size: 2,352 Bytes
47b5f0c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
from qdrant_client import models
from qdrant_client.conversions import common_types as types
from qdrant_client.models import NamedVector, SparseVector
from app.infrastructure.models.my_models import HybridSearchResponse
from app.infrastructure.repository.query_search_repository import QuerySearchRepository
from app.modules.denseEmbeddings.denseEmbeddings import DenseEmbeddings
from app.qdrant import QdrantConnectionDb
class HybridSearcher:
def __init__(
self,
dense_embeddings: DenseEmbeddings,
query_search_repository: QuerySearchRepository,
):
self.dense_embeddings = dense_embeddings
self.query_search_repository = query_search_repository
def sparse_dense_rrf_prefetch(
self, sparse_vector: SparseVector, dense_vector: NamedVector
) -> models.Prefetch:
result = models.Prefetch(
prefetch=[
models.Prefetch(
query=dense_vector.vector,
using="text-dense",
limit=10,
),
models.Prefetch(
query=sparse_vector,
using="text-sparse",
limit=10,
),
],
query=models.FusionQuery(
fusion=models.Fusion.RRF,
),
)
return result
def hybrid_search(self, user_query: str) -> types.QueryResponse:
"""
Hybrid search
:param user_query: str
:return: types.QueryResponse
"""
try:
sparse_vector = self.dense_embeddings.get_sparse_vector(user_query)
dense_vector = self.dense_embeddings.get_dense_vector(user_query)
prefetch_context = self.sparse_dense_rrf_prefetch(
sparse_vector, dense_vector
)
result = self.query_search_repository.find_text_by_hybrid_search(
prefetch_context, dense_vector
)
response_data = [
{"chunk-text": point.payload["chunk-text"]} for point in result.points
]
return HybridSearchResponse(success=True, data=response_data)
except Exception as e:
return HybridSearchResponse(
success=False, message=f"Database operation failed: {str(e)}"
)
|