Spaces:
Running
Running
robertselvam
commited on
Commit
•
5ac67c9
1
Parent(s):
3092adb
Update aggressive_content_finder.py
Browse files- aggressive_content_finder.py +14 -14
aggressive_content_finder.py
CHANGED
@@ -70,25 +70,25 @@ class AggressiveContentFinder:
|
|
70 |
aggressive terms are concatenated and returned. If any errors occur during PDF processing, they are logged for
|
71 |
debugging.
|
72 |
"""
|
73 |
-
try:
|
74 |
|
75 |
-
|
76 |
-
|
77 |
|
78 |
-
|
79 |
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
|
90 |
-
except Exception as e:
|
91 |
-
|
92 |
|
93 |
def file_output_fnn(self,file_path):
|
94 |
file_path = file_path.name
|
|
|
70 |
aggressive terms are concatenated and returned. If any errors occur during PDF processing, they are logged for
|
71 |
debugging.
|
72 |
"""
|
73 |
+
# try:
|
74 |
|
75 |
+
# Open the multi-page PDF using PdfReader
|
76 |
+
pdf = PdfReader(pdf_file_path.name)
|
77 |
|
78 |
+
aggressive_terms = ""
|
79 |
|
80 |
+
# Extract text from each page and pass it to the process_text function
|
81 |
+
for page_number in range(len(pdf.pages)):
|
82 |
+
# Extract text from the page
|
83 |
+
page = pdf.pages[page_number]
|
84 |
+
text = page.extract_text()
|
85 |
|
86 |
+
# Pass the text to the process_text function for further processing
|
87 |
+
aggressive_terms += self._extract_aggressive_content(text)
|
88 |
+
return aggressive_terms
|
89 |
|
90 |
+
# except Exception as e:
|
91 |
+
# print(f"An error occurred while processing the PDF document: {str(e)}")
|
92 |
|
93 |
def file_output_fnn(self,file_path):
|
94 |
file_path = file_path.name
|