Spaces:
Runtime error
Runtime error
Update scripts/process.py
Browse files- scripts/process.py +2 -13
scripts/process.py
CHANGED
@@ -91,19 +91,8 @@ def load_document(
|
|
91 |
documents.append(Document(content=text,
|
92 |
meta={"name": file_name},
|
93 |
id_hash_keys=id_hash_keys))
|
94 |
-
|
95 |
-
|
96 |
-
This can happen whith certain pdf types.'''
|
97 |
-
for i in documents:
|
98 |
-
if i.content == "":
|
99 |
-
st.write("using pdfplumber")
|
100 |
-
text = []
|
101 |
-
with pdfplumber.open(file_path) as pdf:
|
102 |
-
for page in pdf.pages:
|
103 |
-
text.append(page.extract_text())
|
104 |
-
i.content = ' '.join([page for page in text])
|
105 |
-
|
106 |
-
return documents
|
107 |
|
108 |
|
109 |
def preprocessing(document):
|
|
|
91 |
documents.append(Document(content=text,
|
92 |
meta={"name": file_name},
|
93 |
id_hash_keys=id_hash_keys))
|
94 |
+
|
95 |
+
return documents
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
|
98 |
def preprocessing(document):
|