NEXAS commited on
Commit
2b503b2
·
verified ·
1 Parent(s): 2fb446f

Update utils/qa.py

Browse files
Files changed (1) hide show
  1. utils/qa.py +58 -58
utils/qa.py CHANGED
@@ -1,58 +1,58 @@
1
- import logging
2
- from ingestion import DocumentProcessor
3
- from llm import LLMProcessor
4
-
5
-
6
- class QAEngine:
7
- def __init__(self):
8
- self.processor = DocumentProcessor()
9
- self.llm_processor = LLMProcessor()
10
-
11
- def query(self, question: str, k: int = 5) -> str:
12
- """Query the document using semantic search and generate an answer"""
13
- query_embedding = self.llm_processor.embed_model.encode(question)
14
-
15
- # Corrected ChromaDB query syntax
16
- results = self.processor.index.query(
17
- query_embeddings=[query_embedding],
18
- n_results=k
19
- )
20
-
21
- # Extracting results properly
22
- chunks = []
23
- for i in range(len(results["documents"][0])): # Iterate over top-k results
24
- chunks.append({
25
- "text": results["documents"][0][i],
26
- "headings": results["metadatas"][0][i].get("headings", "[]"),
27
- "page": results["metadatas"][0][i].get("page"),
28
- "content_type": results["metadatas"][0][i].get("content_type")
29
- })
30
-
31
- print(f"\nRelevant chunks for query: '{question}'")
32
- print("=" * 80)
33
-
34
- context = self.llm_processor.format_context(chunks)
35
- print(context)
36
-
37
- return self.llm_processor.generate_answer(context, question)
38
-
39
-
40
- # def main():
41
- # logging.basicConfig(level=logging.INFO)
42
-
43
- # processor = DocumentProcessor()
44
-
45
- # pdf_path = "sample/InternLM.pdf"
46
- # processor.process_document(pdf_path)
47
-
48
- # qa_engine = QAEngine()
49
- # question = "What are the main features of InternLM-XComposer-2.5?"
50
- # answer = qa_engine.query(question)
51
-
52
- # print("\nAnswer:")
53
- # print("=" * 80)
54
- # print(answer)
55
-
56
-
57
- # if __name__ == "__main__":
58
- # main()
 
1
+ import logging
2
+ from ingestion import DocumentProcessor
3
+ from llm import LLMProcessor
4
+
5
+
6
+ class QAEngine:
7
+ def __init__(self):
8
+ self.processor = DocumentProcessor()
9
+ self.llm_processor = LLMProcessor()
10
+
11
+ def query(self, question: str, k: int = 5) -> str:
12
+ """Query the document using semantic search and generate an answer"""
13
+ query_embedding = self.llm_processor.embed_model.encode(question)
14
+
15
+ # Corrected ChromaDB query syntax
16
+ results = self.processor.index.query(
17
+ query_embeddings=[query_embedding],
18
+ n_results=k
19
+ )
20
+
21
+ # Extracting results properly
22
+ chunks = []
23
+ for i in range(len(results["documents"][0])): # Iterate over top-k results
24
+ chunks.append({
25
+ "text": results["documents"][0][i],
26
+ "headings": results["metadatas"][0][i].get("headings", "[]"),
27
+ "page": results["metadatas"][0][i].get("page"),
28
+ "content_type": results["metadatas"][0][i].get("content_type")
29
+ })
30
+
31
+ print(f"\nRelevant chunks for query: '{question}'")
32
+ print("=" * 80)
33
+
34
+ context = self.llm_processor.format_context(chunks)
35
+ print(context)
36
+
37
+ return self.llm_processor.generate_answer(context, question)
38
+
39
+
40
+ # def main():
41
+ # logging.basicConfig(level=logging.INFO)
42
+
43
+ # processor = DocumentProcessor()
44
+
45
+ # pdf_path = "sample/InternLM.pdf"
46
+ # processor.process_document(pdf_path)
47
+
48
+ # qa_engine = QAEngine()
49
+ # question = "What are the main features of InternLM-XComposer-2.5?"
50
+ # answer = qa_engine.query(question)
51
+
52
+ # print("\nAnswer:")
53
+ # print("=" * 80)
54
+ # print(answer)
55
+
56
+
57
+ # if __name__ == "__main__":
58
+ # main()