File size: 2,352 Bytes
47b5f0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from qdrant_client import models
from qdrant_client.conversions import common_types as types
from qdrant_client.models import NamedVector, SparseVector

from app.infrastructure.models.my_models import HybridSearchResponse
from app.infrastructure.repository.query_search_repository import QuerySearchRepository
from app.modules.denseEmbeddings.denseEmbeddings import DenseEmbeddings
from app.qdrant import QdrantConnectionDb


class HybridSearcher:

    def __init__(
        self,
        dense_embeddings: DenseEmbeddings,
        query_search_repository: QuerySearchRepository,
    ):
        self.dense_embeddings = dense_embeddings
        self.query_search_repository = query_search_repository

    def sparse_dense_rrf_prefetch(
        self, sparse_vector: SparseVector, dense_vector: NamedVector
    ) -> models.Prefetch:
        result = models.Prefetch(
            prefetch=[
                models.Prefetch(
                    query=dense_vector.vector,
                    using="text-dense",
                    limit=10,
                ),
                models.Prefetch(
                    query=sparse_vector,
                    using="text-sparse",
                    limit=10,
                ),
            ],
            query=models.FusionQuery(
                fusion=models.Fusion.RRF,
            ),
        )

        return result

    def hybrid_search(self, user_query: str) -> types.QueryResponse:
        """
        Hybrid search

        :param user_query: str
        :return: types.QueryResponse
        """
        try:
            sparse_vector = self.dense_embeddings.get_sparse_vector(user_query)
            dense_vector = self.dense_embeddings.get_dense_vector(user_query)

            prefetch_context = self.sparse_dense_rrf_prefetch(
                sparse_vector, dense_vector
            )

            result = self.query_search_repository.find_text_by_hybrid_search(
                prefetch_context, dense_vector
            )

            response_data = [
                {"chunk-text": point.payload["chunk-text"]} for point in result.points
            ]
            return HybridSearchResponse(success=True, data=response_data)

        except Exception as e:
            return HybridSearchResponse(
                success=False, message=f"Database operation failed: {str(e)}"
            )