peter2000 commited on
Commit
0171405
1 Parent(s): eef4a29

Update scripts/process.py

Browse files
Files changed (1) hide show
  1. scripts/process.py +2 -13
scripts/process.py CHANGED
@@ -91,19 +91,8 @@ def load_document(
91
  documents.append(Document(content=text,
92
  meta={"name": file_name},
93
  id_hash_keys=id_hash_keys))
94
-
95
- '''check if text is empty and apply different pdf processor. \
96
- This can happen whith certain pdf types.'''
97
- for i in documents:
98
- if i.content == "":
99
- st.write("using pdfplumber")
100
- text = []
101
- with pdfplumber.open(file_path) as pdf:
102
- for page in pdf.pages:
103
- text.append(page.extract_text())
104
- i.content = ' '.join([page for page in text])
105
-
106
- return documents
107
 
108
 
109
  def preprocessing(document):
 
91
  documents.append(Document(content=text,
92
  meta={"name": file_name},
93
  id_hash_keys=id_hash_keys))
94
+
95
+ return documents
 
 
 
 
 
 
 
 
 
 
 
96
 
97
 
98
  def preprocessing(document):