naveenvenkatesh commited on
Commit
1228921
1 Parent(s): f8095b1

Update contract_missing_clausses.py

Browse files
Files changed (1) hide show
  1. contract_missing_clausses.py +13 -8
contract_missing_clausses.py CHANGED
@@ -1,5 +1,5 @@
1
  import openai
2
- import pdfplumber
3
  import logging
4
 
5
  # Configure logging
@@ -79,16 +79,21 @@ class ContractMissingClauses:
79
  try:
80
 
81
  LOGGER.info("Analyzing contract and extracting pdf page...")
82
-
83
- # Initialize pdfplumber
84
- pdf = pdfplumber.open(pdf_file.name)
85
 
86
  result = ''
87
- # Iterate through each page and extract text
88
- for page in pdf.pages:
89
- contract = page.extract_text()
90
- result += self.get_missing_clauses(contract)
 
 
 
 
 
 
 
91
  return result
 
92
  except Exception as e:
93
  # If an error occurs during the key-value extraction process, log the error
94
  LOGGER.error(f"Error occurred while extracting pdf page: {str(e)}")
 
1
  import openai
2
+ from PyPDF2 import PdfReader
3
  import logging
4
 
5
  # Configure logging
 
79
  try:
80
 
81
  LOGGER.info("Analyzing contract and extracting pdf page...")
 
 
 
82
 
83
  result = ''
84
+
85
+ pdf = PdfReader(pdf_file_path.name)
86
+
87
+ # Extract text from each page and pass it to the process_text function
88
+ for page_number in range(len(pdf.pages)):
89
+
90
+ # Extract text from the page
91
+ page = pdf.pages[page_number]
92
+ text = page.extract_text()
93
+ result += self.get_missing_clauses(contract)
94
+
95
  return result
96
+
97
  except Exception as e:
98
  # If an error occurs during the key-value extraction process, log the error
99
  LOGGER.error(f"Error occurred while extracting pdf page: {str(e)}")