cogcorp commited on
Commit
195e8f3
·
1 Parent(s): 3aae288

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -9,7 +9,7 @@ import shutil
9
  import os
10
  from sklearn.neighbors import NearestNeighbors
11
  from tempfile import NamedTemporaryFile
12
- from PyPDF2 import PdfFileReader
13
 
14
  openAI_key = os.environ['OpenAPI']
15
 
@@ -49,10 +49,10 @@ class SemanticSearch:
49
  return embeddings
50
 
51
  def pdf_to_text(pdf_path, start_page=1):
52
- pdf = PdfFileReader(open(pdf_path, "rb"))
53
- text = ""
54
- for page_num in range(start_page, pdf.getNumPages()):
55
- text += pdf.getPage(page_num).extractText()
56
  return text
57
 
58
  def text_to_chunks(text, start_page=1, chunk_size=512):
 
9
  import os
10
  from sklearn.neighbors import NearestNeighbors
11
  from tempfile import NamedTemporaryFile
12
+ from PyPDF2 import PdfReader
13
 
14
  openAI_key = os.environ['OpenAPI']
15
 
 
49
  return embeddings
50
 
51
  def pdf_to_text(pdf_path, start_page=1):
52
+ pdf = PdfReader(pdf_path)
53
+ text = ''
54
+ for i in range(start_page, len(pdf.pages)):
55
+ text += pdf.pages[i].extract_text()
56
  return text
57
 
58
  def text_to_chunks(text, start_page=1, chunk_size=512):