robertselvam commited on
Commit
5ac67c9
1 Parent(s): 3092adb

Update aggressive_content_finder.py

Browse files
Files changed (1) hide show
  1. aggressive_content_finder.py +14 -14
aggressive_content_finder.py CHANGED
@@ -70,25 +70,25 @@ class AggressiveContentFinder:
70
  aggressive terms are concatenated and returned. If any errors occur during PDF processing, they are logged for
71
  debugging.
72
  """
73
- try:
74
 
75
- # Open the multi-page PDF using PdfReader
76
- pdf = PdfReader(pdf_file_path.name)
77
 
78
- aggressive_terms = ""
79
 
80
- # Extract text from each page and pass it to the process_text function
81
- for page_number in range(len(pdf.pages)):
82
- # Extract text from the page
83
- page = pdf.pages[page_number]
84
- text = page.extract_text()
85
 
86
- # Pass the text to the process_text function for further processing
87
- aggressive_terms += self._extract_aggressive_content(text)
88
- return aggressive_terms
89
 
90
- except Exception as e:
91
- print(f"An error occurred while processing the PDF document: {str(e)}")
92
 
93
  def file_output_fnn(self,file_path):
94
  file_path = file_path.name
 
70
  aggressive terms are concatenated and returned. If any errors occur during PDF processing, they are logged for
71
  debugging.
72
  """
73
+ # try:
74
 
75
+ # Open the multi-page PDF using PdfReader
76
+ pdf = PdfReader(pdf_file_path.name)
77
 
78
+ aggressive_terms = ""
79
 
80
+ # Extract text from each page and pass it to the process_text function
81
+ for page_number in range(len(pdf.pages)):
82
+ # Extract text from the page
83
+ page = pdf.pages[page_number]
84
+ text = page.extract_text()
85
 
86
+ # Pass the text to the process_text function for further processing
87
+ aggressive_terms += self._extract_aggressive_content(text)
88
+ return aggressive_terms
89
 
90
+ # except Exception as e:
91
+ # print(f"An error occurred while processing the PDF document: {str(e)}")
92
 
93
  def file_output_fnn(self,file_path):
94
  file_path = file_path.name