Spaces:

ADOPLE
/

Contract_Management

Runtime error

App Files Files Community

robertselvam commited on Oct 18, 2023

Commit

588b16e

1 Parent(s): a3f1c86

Upload 3 files

Browse files

Files changed (3) hide show

aggressive_content_finder.py +99 -0
incompletesentencefinder.py +93 -0
incorrect_sentence_finder.py +81 -0

aggressive_content_finder.py ADDED Viewed

	@@ -0,0 +1,99 @@

+from PyPDF2 import PdfReader
+import openai
+import fitz  # PyMuPDF
+import gradio as gr
+class AggressiveContentFinder:
+    """
+    This class identifies and extracts aggressive terms in a contract document using OpenAI's GPT-3.
+    """
+    def __init__(self):
+        """
+        Initialize the AggressiveContentFinder with your OpenAI API key.
+        """
+        # openai.api_key = openai_api_key
+        pass
+    def _extract_aggressive_content(self, contract_text: str) -> str:
+        """
+        Use OpenAI's GPT-3 to identify aggressive terms in the given contract text.
+        Args:
+            contract_text (str): Text extracted from the contract.
+        Returns:
+            str: Identified aggressive terms.
+        """
+        try:
+            response = openai.Completion.create(
+                engine="text-davinci-003",
+                prompt=f"""This is a contract document content. Your task is to identify aggressive terms like warning terms, penalties in the given contract:
+                (Example: "The bank may take possession of the property.")
+                ```contract: {contract_text}```
+                """,
+                max_tokens=70,
+                temperature=0
+            )
+            aggressive_terms = response.choices[0].text.strip()
+            return aggressive_terms
+        except Exception as e:
+            print(f"An error occurred during text analysis: {str(e)}")
+    def get_aggressive_content(self, pdf_file_path: str):
+        """
+        Extract text from a PDF document and identify aggressive terms.
+        Args:
+            pdf_file_path (str): Path to the PDF document.
+        Returns:
+            str: Identified aggressive terms in the contract document.
+        This method opens a multi-page PDF using PdfReader and iterates through each page. For each page, it extracts
+        the text and passes it to the _extract_aggressive_content method for further processing. The identified
+        aggressive terms are concatenated and returned. If any errors occur during PDF processing, they are logged for
+        debugging.
+        """
+        try:
+            # Open the multi-page PDF using PdfReader
+            pdf = PdfReader(pdf_file_path)
+            aggressive_terms = ""
+            # Extract text from each page and pass it to the process_text function
+            for page_number in range(len(pdf.pages)):
+                # Extract text from the page
+                page = pdf.pages[page_number]
+                text = page.extract_text()
+                # Pass the text to the process_text function for further processing
+                aggressive_terms += self._extract_aggressive_content(text)
+            return aggressive_terms
+        except Exception as e:
+            print(f"An error occurred while processing the PDF document: {str(e)}")
+    def file_output_fnn(self,file_path):
+        file_path = file_path.name
+        return file_path
+    def gradio_interface(self):
+        with gr.Blocks(css="style.css",theme='xiaobaiyuan/theme_brief') as demo:
+            with gr.Row(elem_id = "col-container",scale=0.80):
+              with gr.Column(elem_id = "col-container",scale=0.80):
+                file1 = gr.File(label="File",elem_classes="filenameshow")
+              with gr.Column(elem_id = "col-container",scale=0.20):
+                upload_button1 = gr.UploadButton(
+                    "Browse File",file_types=[".txt", ".pdf", ".doc", ".docx",".json",".csv"],
+                    elem_classes="uploadbutton")
+                aggressive_content = gr.Button("Get Headings",elem_classes="uploadbutton")
+            with gr.Row(elem_id = "col-container",scale=0.60):
+                headings = gr.Textbox(label = "Headings")
+        upload_button1.upload(self.file_output_fnn,upload_button1,file1)
+        aggressive_content.click(self.get_aggressive_content,upload_button1,headings)

incompletesentencefinder.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import fitz  # PyMuPDF
+import openai
+import gradio as gr
+class IncompleteSentenceFinder:
+    """
+    This class finds and displays incomplete sentences in a PDF document using OpenAI's GPT-3.
+    Args:
+        api_key (str): Your OpenAI API key.
+    """
+    def __init__(self):
+        """
+        Initialize the IncompleteSentenceFinder with the PDF file and OpenAI API key.
+        Args:
+            api_key (str): Your OpenAI API key.
+        """
+        # openai.api_key = openai_api_key
+        pass
+    def _check_incomplete_sentence(self, text: str) -> str:
+          """
+          Use OpenAI's GPT-3 to identify incomplete sentences in the given text.
+          Args:
+              text (str): Text to check for incomplete sentences.
+          Returns:
+              str: Incomplete sentences identified by GPT-3.
+          """
+          # Create a request to OpenAI's GPT-3 engine to identify incomplete sentences.
+          response = openai.Completion.create(
+              engine="text-davinci-003",
+              prompt=f"list out the incomplete sentences in the following text:\n{text}",
+              max_tokens=1000,
+          )
+          # Extract and strip the text of identified incomplete sentences from the GPT-3 response.
+          incomplete_sentences = response.choices[0].text.strip()
+          print("incomplete_sentences Extracted Successfully!")
+          return incomplete_sentences
+    def get_incomplete_sentence(self,pdf_file) -> str:
+        """
+        Extract text from the PDF document and find incomplete sentences.
+        Returns:
+            str: Incomplete sentences identified by GPT-3.
+        """
+        try:
+            # Open the PDF file using PyMuPDF's fitz library
+            doc = fitz.open(pdf_file)
+            incomplete_text = ""
+            # Iterate through each page in the PDF document and extract the text
+            for page in doc:
+                text = page.get_text()
+                incomplete_text += self._check_incomplete_sentence(text)
+            return incomplete_text
+        except Exception as e:
+            print(f"An error occurred: {str(e)}")
+    def file_output_fnn(self,file_path):
+        file_path = file_path.name
+        return file_path
+    def gradio_interface(self):
+        with gr.Blocks(css="style.css",theme='xiaobaiyuan/theme_brief') as demo:
+            with gr.Row(elem_id = "col-container",scale=0.80):
+              with gr.Column(elem_id = "col-container",scale=0.80):
+                file1 = gr.File(label="File",elem_classes="filenameshow")
+              with gr.Column(elem_id = "col-container",scale=0.20):
+                upload_button1 = gr.UploadButton(
+                    "Browse File",file_types=[".txt", ".pdf", ".doc", ".docx",".json",".csv"],
+                    elem_classes="uploadbutton")
+                incomplete_sentence_btn = gr.Button("Get Headings",elem_classes="uploadbutton")
+            with gr.Row(elem_id = "col-container",scale=0.60):
+                headings = gr.Textbox(label = "Headings")
+        upload_button1.upload(self.file_output_fnn,upload_button1,file1)
+        incomplete_sentence_btn.click(self.get_incomplete_sentence,upload_button1,headings)

incorrect_sentence_finder.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import fitz  # PyMuPDF
+import openai
+import gradio as gr
+class IncorrectSentenceFinder:
+    """
+    This class finds and displays grammatically incorrect sentences in a PDF document using OpenAI's GPT-3.
+    Args:
+        pdf_file (str): The path to the PDF file.
+    """
+    def __init__(self):
+        """
+        Initialize the IncorrectSentenceFinder with the OpenAI API key.
+        """
+        # openai.api_key = openai_api_key
+        pass
+    def _find_incorrect_sentence(self, text: str) -> str:
+        """
+        Use OpenAI's GPT-3 to identify grammatically incorrect sentences in the given text.
+        Args:
+            text (str): Text to check for grammatical errors.
+        Returns:
+            str: Grammatically incorrect sentences identified by GPT-3.
+        """
+        # Create a request to OpenAI's GPT-3 engine to identify grammatically incorrect sentences.
+        response = openai.Completion.create(
+            engine="text-davinci-003",
+            prompt=f"list out the grammatical error sentence in the given text:\n{text}",
+            temperature=0,
+            max_tokens=1000,
+        )
+        # Extract and strip the text of identified grammatically incorrect sentences from the GPT-3 response.
+        incorrect_sentences = response.choices[0].text.strip()
+        return incorrect_sentences
+    def get_incorrect_sentence(self, pdf_file: str) -> str:
+        """
+        Extract text from the PDF document and find grammatically incorrect sentences.
+        Returns:
+            str: Grammatically incorrect sentences identified by GPT-3.
+        """
+        try:
+            # Open the PDF file using PyMuPDF's fitz library
+            doc = fitz.open(pdf_file)
+            incorrect_sentences = ''
+            # Iterate through each page in the PDF document and extract the text
+            for page in doc:
+                text = page.get_text()
+                incorrect_sentences += self._find_incorrect_sentence(text)
+            return incorrect_sentences
+        except Exception as e:
+            print(f"An error occurred: {str(e)}")
+    def file_output_fnn(self,file_path):
+        file_path = file_path.name
+        return file_path
+    def gradio_interface(self):
+        with gr.Blocks(css="style.css",theme='xiaobaiyuan/theme_brief') as demo:
+            with gr.Row(elem_id = "col-container",scale=0.80):
+              with gr.Column(elem_id = "col-container",scale=0.80):
+                file1 = gr.File(label="File",elem_classes="filenameshow")
+              with gr.Column(elem_id = "col-container",scale=0.20):
+                upload_button1 = gr.UploadButton(
+                    "Browse File",file_types=[".txt", ".pdf", ".doc", ".docx",".json",".csv"],
+                    elem_classes="uploadbutton")
+                incorrect_sentence = gr.Button("Get Headings",elem_classes="uploadbutton")
+            with gr.Row(elem_id = "col-container",scale=0.60):
+                headings = gr.Textbox(label = "Headings")
+        upload_button1.upload(self.file_output_fnn,upload_button1,file1)
+        incorrect_sentence.click(self.get_incorrect_sentence,upload_button1,headings)