File size: 1,376 Bytes
2b503b2
8e5c781
 
2b503b2
 
 
 
 
 
8e5c781
2b503b2
 
 
5eb4729
 
 
2b503b2
8e5c781
2b503b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import logging
from utils.ingestion import DocumentProcessor
from utils.llm import LLMProcessor


class QAEngine:
    def __init__(self):
        self.processor = DocumentProcessor()
        self.llm_processor = LLMProcessor()
        self.collection = self.processor.client.get_or_create_collection("document_chunks")  # Fix

    def query(self, question: str, k: int = 5) -> str:
        """Query the document using semantic search and generate an answer"""

        # ✅ Correct method for getting embeddings
        query_embedding = self.llm_processor.embed_model.embed_query(question)

        results = self.collection.query(
            query_embeddings=[query_embedding],
            n_results=k
        )

        chunks = []
        for i in range(len(results["documents"][0])):  # Iterate over top-k results
            chunks.append({
                "text": results["documents"][0][i],
                "headings": results["metadatas"][0][i].get("headings", "[]"),
                "page": results["metadatas"][0][i].get("page"),
                "content_type": results["metadatas"][0][i].get("content_type")
            })

        print(f"\nRelevant chunks for query: '{question}'")
        print("=" * 80)

        context = self.llm_processor.format_context(chunks)
        print(context)

        return self.llm_processor.generate_answer(context, question)