Bofandra commited on
Commit
80f0ff1
·
verified ·
1 Parent(s): 2828f4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -5,6 +5,7 @@ import pickle
5
  from PyPDF2 import PdfReader
6
  from sentence_transformers import SentenceTransformer
7
  from huggingface_hub import InferenceClient, HfApi
 
8
 
9
  # Hugging Face Space persistence
10
  HF_REPO_ID = "MoslemBot/kajibuku" # e.g., "username/your-space-name"
@@ -37,8 +38,14 @@ def save_pdf(file, title):
37
  os.makedirs(folder, exist_ok=True)
38
 
39
  # Extract text
40
- reader = PdfReader(file.name)
41
- full_text = "\n".join(p.extract_text() for p in reader.pages if p.extract_text())
 
 
 
 
 
 
42
  print(full_text)
43
 
44
  # Chunk text
 
5
  from PyPDF2 import PdfReader
6
  from sentence_transformers import SentenceTransformer
7
  from huggingface_hub import InferenceClient, HfApi
8
+ import pdfplumber
9
 
10
  # Hugging Face Space persistence
11
  HF_REPO_ID = "MoslemBot/kajibuku" # e.g., "username/your-space-name"
 
38
  os.makedirs(folder, exist_ok=True)
39
 
40
  # Extract text
41
+ # reader = PdfReader(file.name)
42
+ # full_text = "\n".join(p.extract_text() for p in reader.pages if p.extract_text())
43
+
44
+ with pdfplumber.open(file.name) as pdf:
45
+ full_text = ""
46
+ for page in pdf.pages:
47
+ full_text += page.extract_text() + "\n"
48
+
49
  print(full_text)
50
 
51
  # Chunk text