from PyPDF2 import PdfReader from openai import AzureOpenAI import gradio as gr import os class AggressiveContentFinder: """ This class identifies and extracts aggressive terms in a contract document using OpenAI's GPT-3. """ def __init__(self): """ Initialize the AggressiveContentFinder with your OpenAI API key. """ # openai.api_type = os.getenv['api_type'] # openai.api_base = os.getenv['api_base'] # openai.api_version = os.getenv['api_version'] # openai.api_key = os.getenv['api_key'] pass def _extract_aggressive_content(self, contract_text: str) -> str: """ Use OpenAI's GPT-3 to identify aggressive terms in the given contract text. Args: contract_text (str): Text extracted from the contract. Returns: str: Identified aggressive terms. """ try: client = AzureOpenAI(api_key=os.getenv("AZURE_OPENAI_KEY"), api_version="2023-07-01-preview", azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") ) conversation = [ {"role": "system", "content": "You are a helpful Aggressive Terms Finder in Given Contract."}, {"role": "user", "content": f"""This is a contract document content. Your task is to find aggressive terms, warning terms and penalties in the given contract. ```contract: {contract_text}```"""} ] # Call OpenAI GPT-3.5-turbo chat_completion = client.chat.completions.create( model = "GPT-3", messages = conversation, max_tokens=1000, temperature=0 ) response = chat_completion.choices[0].message.content return response except Exception as e: print(f"An error occurred during text analysis: {str(e)}") def get_aggressive_content(self, pdf_file_path: str): """ Extract text from a PDF document and identify aggressive terms. Args: pdf_file_path (str): Path to the PDF document. Returns: str: Identified aggressive terms in the contract document. This method opens a multi-page PDF using PdfReader and iterates through each page. For each page, it extracts the text and passes it to the _extract_aggressive_content method for further processing. The identified aggressive terms are concatenated and returned. If any errors occur during PDF processing, they are logged for debugging. """ # try: # Open the multi-page PDF using PdfReader pdf = PdfReader(pdf_file_path.name) aggressive_terms = "" # Extract text from each page and pass it to the process_text function for page_number in range(len(pdf.pages)): # Extract text from the page page = pdf.pages[page_number] text = page.extract_text() # Pass the text to the process_text function for further processing aggressive_terms += self._extract_aggressive_content(text) return aggressive_terms # except Exception as e: # print(f"An error occurred while processing the PDF document: {str(e)}") def file_output_fnn(self,file_path): file_path = file_path.name return file_path def gradio_interface(self): with gr.Blocks(css="style.css",theme='xiaobaiyuan/theme_brief') as demo: with gr.Row(elem_id = "col-container",scale=0.80): # with gr.Column(elem_id = "col-container",scale=0.80): # file1 = gr.File(label="File",elem_classes="filenameshow") # with gr.Column(elem_id = "col-container",scale=0.20): # upload_button1 = gr.UploadButton( # "Browse File",file_types=[".txt", ".pdf", ".doc", ".docx",".json",".csv"], # elem_classes="uploadbutton") aggressive_content = gr.Button("Get Aggressive Content",elem_classes="uploadbutton") with gr.Row(elem_id = "col-container",scale=0.60): headings = gr.Textbox(label = "Aggressive Content") # upload_button1.upload(self.file_output_fnn,upload_button1,file1) aggressive_content.click(self.get_aggressive_content,[],headings)