Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -139,9 +139,13 @@ def PDFChunkerWithSeparator(filepath, separator):
|
|
139 |
# print the number of pages in pdf file
|
140 |
print(len(reader.pages))
|
141 |
content = ""
|
142 |
-
|
143 |
-
|
144 |
-
|
|
|
|
|
|
|
|
|
145 |
splitted_content_list = content.split(separator)
|
146 |
|
147 |
doclist = []
|
@@ -175,8 +179,9 @@ def loadKB(fileprovided, urlProvided, uploads_dir, request):
|
|
175 |
separator = "</Q>"
|
176 |
documents.extend(PDFChunkerWithSeparator(os.path.join(uploads_dir, secure_filename(file.filename)),separator))
|
177 |
else:
|
178 |
-
loader = TextLoader('Jio.txt')
|
179 |
-
documents.extend(loader.load())
|
|
|
180 |
|
181 |
if urlProvided:
|
182 |
weburl = request.form.getlist('weburl')
|
|
|
139 |
# print the number of pages in pdf file
|
140 |
print(len(reader.pages))
|
141 |
content = ""
|
142 |
+
if filepath.endswith(".pdf"):
|
143 |
+
for page in reader.pages:
|
144 |
+
content += page.extract_text()
|
145 |
+
elif filepath.endswith(".txt"):
|
146 |
+
with open(filepath) as f:
|
147 |
+
content = f.readlines()
|
148 |
+
f.close()
|
149 |
splitted_content_list = content.split(separator)
|
150 |
|
151 |
doclist = []
|
|
|
179 |
separator = "</Q>"
|
180 |
documents.extend(PDFChunkerWithSeparator(os.path.join(uploads_dir, secure_filename(file.filename)),separator))
|
181 |
else:
|
182 |
+
#loader = TextLoader('Jio.txt')
|
183 |
+
#documents.extend(loader.load())
|
184 |
+
documents.extend(PDFChunkerWithSeparator('JTest.txt',separator))
|
185 |
|
186 |
if urlProvided:
|
187 |
weburl = request.form.getlist('weburl')
|