Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update auditqa/doc_process.py
Browse files- auditqa/doc_process.py +10 -2
auditqa/doc_process.py
CHANGED
@@ -37,7 +37,15 @@ def process_pdf():
|
|
37 |
all_documents = {}
|
38 |
categories = list(files.keys())
|
39 |
for category in categories:
|
40 |
-
all_documents[category] =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
-
|
43 |
|
|
|
37 |
all_documents = {}
|
38 |
categories = list(files.keys())
|
39 |
for category in categories:
|
40 |
+
all_documents[category] = []
|
41 |
+
subtypes = list(files[category].keys())
|
42 |
+
for subtype in subtypes:
|
43 |
+
for file in files[category][subtype]:
|
44 |
+
doc_processed = text_splitter.split_documents(docs[file])
|
45 |
+
for doc in doc_processed:
|
46 |
+
doc.metadata["source"] = category
|
47 |
+
doc.metadata["subtype"] = subtype
|
48 |
+
doc.metadata["year"] = file[-4:]
|
49 |
|
50 |
+
all_documents[category].append(doc_processed)
|
51 |
|