Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -16,7 +16,10 @@ def download_pdf(url, output_path):
|
|
16 |
|
17 |
def extract_zip(file):
|
18 |
with zipfile.ZipFile(file, 'r') as zip_ref:
|
19 |
-
zip_ref.
|
|
|
|
|
|
|
20 |
|
21 |
def preprocess(text):
|
22 |
text = text.replace('\n', ' ')
|
@@ -94,8 +97,9 @@ def load_recommender(paths, start_page=1):
|
|
94 |
global recommender
|
95 |
chunks = []
|
96 |
for path in paths:
|
97 |
-
|
98 |
-
|
|
|
99 |
recommender.fit(chunks)
|
100 |
return 'Corpus Loaded.'
|
101 |
|
|
|
16 |
|
17 |
def extract_zip(file):
|
18 |
with zipfile.ZipFile(file, 'r') as zip_ref:
|
19 |
+
for member in zip_ref.namelist():
|
20 |
+
filename = os.path.basename(member)
|
21 |
+
if filename.endswith('.pdf'):
|
22 |
+
zip_ref.extract(member, 'pdfs')
|
23 |
|
24 |
def preprocess(text):
|
25 |
text = text.replace('\n', ' ')
|
|
|
97 |
global recommender
|
98 |
chunks = []
|
99 |
for path in paths:
|
100 |
+
if path.endswith('.pdf'):
|
101 |
+
texts = pdf_to_text(path, start_page=start_page)
|
102 |
+
chunks += text_to_chunks(texts, start_page=start_page)
|
103 |
recommender.fit(chunks)
|
104 |
return 'Corpus Loaded.'
|
105 |
|