XThomasBU
commited on
Commit
·
49140fa
1
Parent(s):
4265034
reverted simplistic check for informative changes
Browse files
code/modules/dataloader/data_loader.py
CHANGED
|
@@ -223,8 +223,8 @@ class ChunkProcessor:
|
|
| 223 |
file_metadata = {}
|
| 224 |
|
| 225 |
for doc in documents:
|
| 226 |
-
if len(doc.page_content) <= 400:
|
| 227 |
-
|
| 228 |
|
| 229 |
page_num = doc.metadata.get("page", 0)
|
| 230 |
file_data[page_num] = doc.page_content
|
|
|
|
| 223 |
file_metadata = {}
|
| 224 |
|
| 225 |
for doc in documents:
|
| 226 |
+
# if len(doc.page_content) <= 400: # better approach to filter out non-informative documents
|
| 227 |
+
# continue
|
| 228 |
|
| 229 |
page_num = doc.metadata.get("page", 0)
|
| 230 |
file_data[page_num] = doc.page_content
|