import logging from ingestion import DocumentProcessor from llm import LLMProcessor class QAEngine: def __init__(self): self.processor = DocumentProcessor() self.llm_processor = LLMProcessor() def query(self, question: str, k: int = 5) -> str: """Query the document using semantic search and generate an answer""" query_embedding = self.llm_processor.embed_model.encode(question) # Corrected ChromaDB query syntax results = self.processor.index.query( query_embeddings=[query_embedding], n_results=k ) # Extracting results properly chunks = [] for i in range(len(results["documents"][0])): # Iterate over top-k results chunks.append({ "text": results["documents"][0][i], "headings": results["metadatas"][0][i].get("headings", "[]"), "page": results["metadatas"][0][i].get("page"), "content_type": results["metadatas"][0][i].get("content_type") }) print(f"\nRelevant chunks for query: '{question}'") print("=" * 80) context = self.llm_processor.format_context(chunks) print(context) return self.llm_processor.generate_answer(context, question) # def main(): # logging.basicConfig(level=logging.INFO) # processor = DocumentProcessor() # pdf_path = "sample/InternLM.pdf" # processor.process_document(pdf_path) # qa_engine = QAEngine() # question = "What are the main features of InternLM-XComposer-2.5?" # answer = qa_engine.query(question) # print("\nAnswer:") # print("=" * 80) # print(answer) # if __name__ == "__main__": # main()