Spaces:
Runtime error
Runtime error
Arjun Moorthy
commited on
Commit
Β·
2105147
1
Parent(s):
2720b05
Enable PDF processing in RAG system
Browse files- Oncolife/app.py +16 -1
Oncolife/app.py
CHANGED
@@ -129,7 +129,22 @@ class OncoLifeAssistant:
|
|
129 |
|
130 |
documents_loaded = 0
|
131 |
|
132 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
for json_file in docs_path.glob("*.json"):
|
134 |
try:
|
135 |
print(f"π Processing JSON: {json_file.name}")
|
|
|
129 |
|
130 |
documents_loaded = 0
|
131 |
|
132 |
+
# Process PDF files (essential medical guidelines)
|
133 |
+
for pdf_file in docs_path.glob("*.pdf"):
|
134 |
+
try:
|
135 |
+
print(f"π Processing PDF: {pdf_file.name}")
|
136 |
+
text = self._extract_pdf_text(pdf_file)
|
137 |
+
if text:
|
138 |
+
chunks = text_splitter.split_text(text)
|
139 |
+
self._add_chunks_to_db(chunks, pdf_file.name)
|
140 |
+
documents_loaded += 1
|
141 |
+
print(f"β
Added {len(chunks)} chunks from {pdf_file.name}")
|
142 |
+
else:
|
143 |
+
print(f"β οΈ No text extracted from {pdf_file.name}")
|
144 |
+
except Exception as e:
|
145 |
+
print(f"β Error processing {pdf_file.name}: {e}")
|
146 |
+
|
147 |
+
# Process JSON files (lightweight)
|
148 |
for json_file in docs_path.glob("*.json"):
|
149 |
try:
|
150 |
print(f"π Processing JSON: {json_file.name}")
|