from PyPDF2 import PdfReader import openai import fitz # PyMuPDF import gradio as gr class AggressiveContentFinder: """ This class identifies and extracts aggressive terms in a contract document using OpenAI's GPT-3. """ def __init__(self): """ Initialize the AggressiveContentFinder with your OpenAI API key. """ # openai.api_key = openai_api_key pass def _extract_aggressive_content(self, contract_text: str) -> str: """ Use OpenAI's GPT-3 to identify aggressive terms in the given contract text. Args: contract_text (str): Text extracted from the contract. Returns: str: Identified aggressive terms. """ try: response = openai.Completion.create( engine="text-davinci-003", prompt=f"""This is a contract document content. Your task is to identify aggressive terms like warning terms, penalties in the given contract: (Example: "The bank may take possession of the property.") ```contract: {contract_text}``` """, max_tokens=70, temperature=0 ) aggressive_terms = response.choices[0].text.strip() return aggressive_terms except Exception as e: print(f"An error occurred during text analysis: {str(e)}") def get_aggressive_content(self, pdf_file_path: str): """ Extract text from a PDF document and identify aggressive terms. Args: pdf_file_path (str): Path to the PDF document. Returns: str: Identified aggressive terms in the contract document. This method opens a multi-page PDF using PdfReader and iterates through each page. For each page, it extracts the text and passes it to the _extract_aggressive_content method for further processing. The identified aggressive terms are concatenated and returned. If any errors occur during PDF processing, they are logged for debugging. """ try: # Open the multi-page PDF using PdfReader pdf = PdfReader(pdf_file_path.name) aggressive_terms = "" # Extract text from each page and pass it to the process_text function for page_number in range(len(pdf.pages)): # Extract text from the page page = pdf.pages[page_number] text = page.extract_text() # Pass the text to the process_text function for further processing aggressive_terms += self._extract_aggressive_content(text) return aggressive_terms except Exception as e: print(f"An error occurred while processing the PDF document: {str(e)}") def file_output_fnn(self,file_path): file_path = file_path.name return file_path def gradio_interface(self): with gr.Blocks(css="style.css",theme='xiaobaiyuan/theme_brief') as demo: with gr.Row(elem_id = "col-container",scale=0.80): # with gr.Column(elem_id = "col-container",scale=0.80): # file1 = gr.File(label="File",elem_classes="filenameshow") # with gr.Column(elem_id = "col-container",scale=0.20): # upload_button1 = gr.UploadButton( # "Browse File",file_types=[".txt", ".pdf", ".doc", ".docx",".json",".csv"], # elem_classes="uploadbutton") aggressive_content = gr.Button("Get Aggressive Content",elem_classes="uploadbutton") with gr.Row(elem_id = "col-container",scale=0.60): headings = gr.Textbox(label = "Aggressive Content") # upload_button1.upload(self.file_output_fnn,upload_button1,file1) aggressive_content.click(self.get_aggressive_content,[],headings)