Yoxas commited on
Commit
f8af002
·
verified ·
1 Parent(s): 791ccd1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -5
app.py CHANGED
@@ -23,11 +23,18 @@ def clean_text(text):
23
 
24
  # Function to extract text from PDF files
25
  def extract_text(pdf_file):
26
- pdf_reader = PyPDF2.PdfReader(pdf_file)
27
- text = ''
28
- for page_num in range(len(pdf_reader.pages)):
29
- text += pdf_reader.pages[page_num].extract_text()
30
- return text
 
 
 
 
 
 
 
31
 
32
  # Function to split text into chunks of a specified size
33
  def split_text(text, chunk_size=1024):
 
23
 
24
  # Function to extract text from PDF files
25
  def extract_text(pdf_file):
26
+ try:
27
+ pdf_reader = PdfReader(pdf_file)
28
+ if pdf_reader.is_encrypted:
29
+ print(f"Skipping encrypted file: {pdf_file}")
30
+ return None
31
+ text = ''
32
+ for page in pdf_reader.pages:
33
+ text += page.extract_text() or ''
34
+ return text
35
+ except Exception as e:
36
+ print(f"Error extracting text from {pdf_file}: {e}")
37
+ return None
38
 
39
  # Function to split text into chunks of a specified size
40
  def split_text(text, chunk_size=1024):