File size: 4,625 Bytes
588b16e
245e161
588b16e
2fbb473
588b16e
 
 
 
 
 
 
 
059c9ac
588b16e
 
 
f156b0d
 
 
 
 
588b16e
 
 
 
 
 
 
 
 
 
 
 
b895d40
 
 
 
 
f8d40ff
 
aade48a
f8d40ff
 
 
 
b895d40
 
 
 
 
 
f8d40ff
 
 
588b16e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ac67c9
9dbd6b3
5ac67c9
 
588b16e
5ac67c9
588b16e
5ac67c9
 
 
 
 
588b16e
5ac67c9
 
 
588b16e
5ac67c9
 
588b16e
 
 
 
 
 
 
9237f2d
d087090
 
588b16e
d087090
 
 
 
9237f2d
588b16e
 
007ac39
588b16e
d087090
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from PyPDF2 import PdfReader
from openai import AzureOpenAI
import gradio as gr
import os


class AggressiveContentFinder:
    """
    This class identifies and extracts aggressive terms in a contract document using OpenAI's GPT-3.

    """

    def __init__(self):
        """
        Initialize the AggressiveContentFinder with your OpenAI API key.
        """
        # openai.api_type = os.getenv['api_type']
        # openai.api_base = os.getenv['api_base']
        # openai.api_version = os.getenv['api_version']
        # openai.api_key = os.getenv['api_key']
        pass

    def _extract_aggressive_content(self, contract_text: str) -> str:
        """
        Use OpenAI's GPT-3 to identify aggressive terms in the given contract text.

        Args:
            contract_text (str): Text extracted from the contract.

        Returns:
            str: Identified aggressive terms.
        """
        try:
            client = AzureOpenAI(api_key=os.getenv("AZURE_OPENAI_KEY"),  
                                api_version="2023-07-01-preview",
                                azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
                                 )
                        
            conversation = [
                            {"role": "system", "content": "You are a helpful Aggressive Terms Finder in Given Contract."},
                            {"role": "user", "content": f"""This is a contract document content. Your task is to find aggressive terms, warning terms and penalties in the given contract.
                              ```contract: {contract_text}```"""}
                            ]
                        
            # Call OpenAI GPT-3.5-turbo
            chat_completion = client.chat.completions.create(
                model = "ChatGPT",
                messages = conversation,
                max_tokens=1000,
                temperature=0
            )
            response = chat_completion.choices[0].message.content
            return response
            
        except Exception as e:
            print(f"An error occurred during text analysis: {str(e)}")

    def get_aggressive_content(self, pdf_file_path: str):
        """
        Extract text from a PDF document and identify aggressive terms.

        Args:
            pdf_file_path (str): Path to the PDF document.

        Returns:
            str: Identified aggressive terms in the contract document.

        This method opens a multi-page PDF using PdfReader and iterates through each page. For each page, it extracts
        the text and passes it to the _extract_aggressive_content method for further processing. The identified
        aggressive terms are concatenated and returned. If any errors occur during PDF processing, they are logged for
        debugging.
        """
        # try:

        # Open the multi-page PDF using PdfReader
        pdf = PdfReader(pdf_file_path.name)

        aggressive_terms = ""

        # Extract text from each page and pass it to the process_text function
        for page_number in range(len(pdf.pages)):
            # Extract text from the page
            page = pdf.pages[page_number]
            text = page.extract_text()

            # Pass the text to the process_text function for further processing
            aggressive_terms += self._extract_aggressive_content(text)
        return aggressive_terms

        # except Exception as e:
        #     print(f"An error occurred while processing the PDF document: {str(e)}")

    def file_output_fnn(self,file_path):
        file_path = file_path.name
        return file_path

    def gradio_interface(self):
        with gr.Blocks(css="style.css",theme='xiaobaiyuan/theme_brief') as demo:    
            with gr.Row(elem_id = "col-container",scale=0.80):
              # with gr.Column(elem_id = "col-container",scale=0.80):
              #   file1 = gr.File(label="File",elem_classes="filenameshow")
            
              # with gr.Column(elem_id = "col-container",scale=0.20):  
              #   upload_button1 = gr.UploadButton(
              #       "Browse File",file_types=[".txt", ".pdf", ".doc", ".docx",".json",".csv"],
              #       elem_classes="uploadbutton")
                aggressive_content = gr.Button("Get Aggressive Content",elem_classes="uploadbutton")
            
            with gr.Row(elem_id = "col-container",scale=0.60):    
                headings = gr.Textbox(label = "Aggressive Content")

        # upload_button1.upload(self.file_output_fnn,upload_button1,file1)
        aggressive_content.click(self.get_aggressive_content,[],headings)