cogcorp commited on
Commit
15e8c54
·
1 Parent(s): 8e74737

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -16,7 +16,10 @@ def download_pdf(url, output_path):
16
 
17
  def extract_zip(file):
18
  with zipfile.ZipFile(file, 'r') as zip_ref:
19
- zip_ref.extractall('pdfs')
 
 
 
20
 
21
  def preprocess(text):
22
  text = text.replace('\n', ' ')
@@ -94,8 +97,9 @@ def load_recommender(paths, start_page=1):
94
  global recommender
95
  chunks = []
96
  for path in paths:
97
- texts = pdf_to_text(path, start_page=start_page)
98
- chunks += text_to_chunks(texts, start_page=start_page)
 
99
  recommender.fit(chunks)
100
  return 'Corpus Loaded.'
101
 
 
16
 
17
  def extract_zip(file):
18
  with zipfile.ZipFile(file, 'r') as zip_ref:
19
+ for member in zip_ref.namelist():
20
+ filename = os.path.basename(member)
21
+ if filename.endswith('.pdf'):
22
+ zip_ref.extract(member, 'pdfs')
23
 
24
  def preprocess(text):
25
  text = text.replace('\n', ' ')
 
97
  global recommender
98
  chunks = []
99
  for path in paths:
100
+ if path.endswith('.pdf'):
101
+ texts = pdf_to_text(path, start_page=start_page)
102
+ chunks += text_to_chunks(texts, start_page=start_page)
103
  recommender.fit(chunks)
104
  return 'Corpus Loaded.'
105