Spaces:
Running
Running
File size: 1,794 Bytes
7e75a72 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import logging
from ingestion import DocumentProcessor
from llm import LLMProcessor
class QAEngine:
def __init__(self):
self.processor = DocumentProcessor()
self.llm_processor = LLMProcessor()
def query(self, question: str, k: int = 5) -> str:
"""Query the document using semantic search and generate an answer"""
query_embedding = self.llm_processor.embed_model.encode(question)
# Corrected ChromaDB query syntax
results = self.processor.index.query(
query_embeddings=[query_embedding],
n_results=k
)
# Extracting results properly
chunks = []
for i in range(len(results["documents"][0])): # Iterate over top-k results
chunks.append({
"text": results["documents"][0][i],
"headings": results["metadatas"][0][i].get("headings", "[]"),
"page": results["metadatas"][0][i].get("page"),
"content_type": results["metadatas"][0][i].get("content_type")
})
print(f"\nRelevant chunks for query: '{question}'")
print("=" * 80)
context = self.llm_processor.format_context(chunks)
print(context)
return self.llm_processor.generate_answer(context, question)
# def main():
# logging.basicConfig(level=logging.INFO)
# processor = DocumentProcessor()
# pdf_path = "sample/InternLM.pdf"
# processor.process_document(pdf_path)
# qa_engine = QAEngine()
# question = "What are the main features of InternLM-XComposer-2.5?"
# answer = qa_engine.query(question)
# print("\nAnswer:")
# print("=" * 80)
# print(answer)
# if __name__ == "__main__":
# main()
|