NEXAS commited on
Commit
8e5c781
·
verified ·
1 Parent(s): f65750f

Update utils/qa.py

Browse files
Files changed (1) hide show
  1. utils/qa.py +4 -26
utils/qa.py CHANGED
@@ -1,24 +1,23 @@
1
  import logging
2
- from ingestion import DocumentProcessor
3
- from llm import LLMProcessor
4
 
5
 
6
  class QAEngine:
7
  def __init__(self):
8
  self.processor = DocumentProcessor()
9
  self.llm_processor = LLMProcessor()
 
10
 
11
  def query(self, question: str, k: int = 5) -> str:
12
  """Query the document using semantic search and generate an answer"""
13
  query_embedding = self.llm_processor.embed_model.encode(question)
14
 
15
- # Corrected ChromaDB query syntax
16
- results = self.processor.index.query(
17
  query_embeddings=[query_embedding],
18
  n_results=k
19
  )
20
 
21
- # Extracting results properly
22
  chunks = []
23
  for i in range(len(results["documents"][0])): # Iterate over top-k results
24
  chunks.append({
@@ -35,24 +34,3 @@ class QAEngine:
35
  print(context)
36
 
37
  return self.llm_processor.generate_answer(context, question)
38
-
39
-
40
- # def main():
41
- # logging.basicConfig(level=logging.INFO)
42
-
43
- # processor = DocumentProcessor()
44
-
45
- # pdf_path = "sample/InternLM.pdf"
46
- # processor.process_document(pdf_path)
47
-
48
- # qa_engine = QAEngine()
49
- # question = "What are the main features of InternLM-XComposer-2.5?"
50
- # answer = qa_engine.query(question)
51
-
52
- # print("\nAnswer:")
53
- # print("=" * 80)
54
- # print(answer)
55
-
56
-
57
- # if __name__ == "__main__":
58
- # main()
 
1
  import logging
2
+ from utils.ingestion import DocumentProcessor
3
+ from utils.llm import LLMProcessor
4
 
5
 
6
  class QAEngine:
7
  def __init__(self):
8
  self.processor = DocumentProcessor()
9
  self.llm_processor = LLMProcessor()
10
+ self.collection = self.processor.client.get_or_create_collection("document_chunks") # Fix
11
 
12
  def query(self, question: str, k: int = 5) -> str:
13
  """Query the document using semantic search and generate an answer"""
14
  query_embedding = self.llm_processor.embed_model.encode(question)
15
 
16
+ results = self.collection.query(
 
17
  query_embeddings=[query_embedding],
18
  n_results=k
19
  )
20
 
 
21
  chunks = []
22
  for i in range(len(results["documents"][0])): # Iterate over top-k results
23
  chunks.append({
 
34
  print(context)
35
 
36
  return self.llm_processor.generate_answer(context, question)