Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import pickle
|
|
5 |
from PyPDF2 import PdfReader
|
6 |
from sentence_transformers import SentenceTransformer
|
7 |
from huggingface_hub import InferenceClient, HfApi
|
|
|
8 |
|
9 |
# Hugging Face Space persistence
|
10 |
HF_REPO_ID = "MoslemBot/kajibuku" # e.g., "username/your-space-name"
|
@@ -37,8 +38,14 @@ def save_pdf(file, title):
|
|
37 |
os.makedirs(folder, exist_ok=True)
|
38 |
|
39 |
# Extract text
|
40 |
-
reader = PdfReader(file.name)
|
41 |
-
full_text = "\n".join(p.extract_text() for p in reader.pages if p.extract_text())
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
print(full_text)
|
43 |
|
44 |
# Chunk text
|
|
|
5 |
from PyPDF2 import PdfReader
|
6 |
from sentence_transformers import SentenceTransformer
|
7 |
from huggingface_hub import InferenceClient, HfApi
|
8 |
+
import pdfplumber
|
9 |
|
10 |
# Hugging Face Space persistence
|
11 |
HF_REPO_ID = "MoslemBot/kajibuku" # e.g., "username/your-space-name"
|
|
|
38 |
os.makedirs(folder, exist_ok=True)
|
39 |
|
40 |
# Extract text
|
41 |
+
# reader = PdfReader(file.name)
|
42 |
+
# full_text = "\n".join(p.extract_text() for p in reader.pages if p.extract_text())
|
43 |
+
|
44 |
+
with pdfplumber.open(file.name) as pdf:
|
45 |
+
full_text = ""
|
46 |
+
for page in pdf.pages:
|
47 |
+
full_text += page.extract_text() + "\n"
|
48 |
+
|
49 |
print(full_text)
|
50 |
|
51 |
# Chunk text
|