Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,7 @@ import shutil
|
|
9 |
import os
|
10 |
from sklearn.neighbors import NearestNeighbors
|
11 |
from tempfile import NamedTemporaryFile
|
12 |
-
from PyPDF2 import
|
13 |
|
14 |
openAI_key = os.environ['OpenAPI']
|
15 |
|
@@ -49,10 +49,10 @@ class SemanticSearch:
|
|
49 |
return embeddings
|
50 |
|
51 |
def pdf_to_text(pdf_path, start_page=1):
|
52 |
-
pdf =
|
53 |
-
text =
|
54 |
-
for
|
55 |
-
text += pdf.
|
56 |
return text
|
57 |
|
58 |
def text_to_chunks(text, start_page=1, chunk_size=512):
|
|
|
9 |
import os
|
10 |
from sklearn.neighbors import NearestNeighbors
|
11 |
from tempfile import NamedTemporaryFile
|
12 |
+
from PyPDF2 import PdfReader
|
13 |
|
14 |
openAI_key = os.environ['OpenAPI']
|
15 |
|
|
|
49 |
return embeddings
|
50 |
|
51 |
def pdf_to_text(pdf_path, start_page=1):
|
52 |
+
pdf = PdfReader(pdf_path)
|
53 |
+
text = ''
|
54 |
+
for i in range(start_page, len(pdf.pages)):
|
55 |
+
text += pdf.pages[i].extract_text()
|
56 |
return text
|
57 |
|
58 |
def text_to_chunks(text, start_page=1, chunk_size=512):
|