Arjun Moorthy commited on
Commit
2105147
Β·
1 Parent(s): 2720b05

Enable PDF processing in RAG system

Browse files
Files changed (1) hide show
  1. Oncolife/app.py +16 -1
Oncolife/app.py CHANGED
@@ -129,7 +129,22 @@ class OncoLifeAssistant:
129
 
130
  documents_loaded = 0
131
 
132
- # Only process JSON files (lightweight)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  for json_file in docs_path.glob("*.json"):
134
  try:
135
  print(f"πŸ“„ Processing JSON: {json_file.name}")
 
129
 
130
  documents_loaded = 0
131
 
132
+ # Process PDF files (essential medical guidelines)
133
+ for pdf_file in docs_path.glob("*.pdf"):
134
+ try:
135
+ print(f"πŸ“„ Processing PDF: {pdf_file.name}")
136
+ text = self._extract_pdf_text(pdf_file)
137
+ if text:
138
+ chunks = text_splitter.split_text(text)
139
+ self._add_chunks_to_db(chunks, pdf_file.name)
140
+ documents_loaded += 1
141
+ print(f"βœ… Added {len(chunks)} chunks from {pdf_file.name}")
142
+ else:
143
+ print(f"⚠️ No text extracted from {pdf_file.name}")
144
+ except Exception as e:
145
+ print(f"❌ Error processing {pdf_file.name}: {e}")
146
+
147
+ # Process JSON files (lightweight)
148
  for json_file in docs_path.glob("*.json"):
149
  try:
150
  print(f"πŸ“„ Processing JSON: {json_file.name}")