Spaces:
Runtime error
Runtime error
Update aggressive_content_finder.py
Browse files- aggressive_content_finder.py +14 -14
aggressive_content_finder.py
CHANGED
|
@@ -70,25 +70,25 @@ class AggressiveContentFinder:
|
|
| 70 |
aggressive terms are concatenated and returned. If any errors occur during PDF processing, they are logged for
|
| 71 |
debugging.
|
| 72 |
"""
|
| 73 |
-
try:
|
| 74 |
|
| 75 |
-
|
| 76 |
-
|
| 77 |
|
| 78 |
-
|
| 79 |
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
|
| 90 |
-
except Exception as e:
|
| 91 |
-
|
| 92 |
|
| 93 |
def file_output_fnn(self,file_path):
|
| 94 |
file_path = file_path.name
|
|
|
|
| 70 |
aggressive terms are concatenated and returned. If any errors occur during PDF processing, they are logged for
|
| 71 |
debugging.
|
| 72 |
"""
|
| 73 |
+
# try:
|
| 74 |
|
| 75 |
+
# Open the multi-page PDF using PdfReader
|
| 76 |
+
pdf = PdfReader(pdf_file_path.name)
|
| 77 |
|
| 78 |
+
aggressive_terms = ""
|
| 79 |
|
| 80 |
+
# Extract text from each page and pass it to the process_text function
|
| 81 |
+
for page_number in range(len(pdf.pages)):
|
| 82 |
+
# Extract text from the page
|
| 83 |
+
page = pdf.pages[page_number]
|
| 84 |
+
text = page.extract_text()
|
| 85 |
|
| 86 |
+
# Pass the text to the process_text function for further processing
|
| 87 |
+
aggressive_terms += self._extract_aggressive_content(text)
|
| 88 |
+
return aggressive_terms
|
| 89 |
|
| 90 |
+
# except Exception as e:
|
| 91 |
+
# print(f"An error occurred while processing the PDF document: {str(e)}")
|
| 92 |
|
| 93 |
def file_output_fnn(self,file_path):
|
| 94 |
file_path = file_path.name
|