robertselvam commited on
Commit
588b16e
1 Parent(s): a3f1c86

Upload 3 files

Browse files
aggressive_content_finder.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PyPDF2 import PdfReader
2
+ import openai
3
+ import fitz # PyMuPDF
4
+ import gradio as gr
5
+
6
+
7
+ class AggressiveContentFinder:
8
+ """
9
+ This class identifies and extracts aggressive terms in a contract document using OpenAI's GPT-3.
10
+
11
+ """
12
+
13
+ def __init__(self):
14
+ """
15
+ Initialize the AggressiveContentFinder with your OpenAI API key.
16
+ """
17
+ # openai.api_key = openai_api_key
18
+ pass
19
+
20
+ def _extract_aggressive_content(self, contract_text: str) -> str:
21
+ """
22
+ Use OpenAI's GPT-3 to identify aggressive terms in the given contract text.
23
+
24
+ Args:
25
+ contract_text (str): Text extracted from the contract.
26
+
27
+ Returns:
28
+ str: Identified aggressive terms.
29
+ """
30
+ try:
31
+ response = openai.Completion.create(
32
+ engine="text-davinci-003",
33
+ prompt=f"""This is a contract document content. Your task is to identify aggressive terms like warning terms, penalties in the given contract:
34
+ (Example: "The bank may take possession of the property.")
35
+ ```contract: {contract_text}```
36
+ """,
37
+ max_tokens=70,
38
+ temperature=0
39
+ )
40
+ aggressive_terms = response.choices[0].text.strip()
41
+ return aggressive_terms
42
+ except Exception as e:
43
+ print(f"An error occurred during text analysis: {str(e)}")
44
+
45
+ def get_aggressive_content(self, pdf_file_path: str):
46
+ """
47
+ Extract text from a PDF document and identify aggressive terms.
48
+
49
+ Args:
50
+ pdf_file_path (str): Path to the PDF document.
51
+
52
+ Returns:
53
+ str: Identified aggressive terms in the contract document.
54
+
55
+ This method opens a multi-page PDF using PdfReader and iterates through each page. For each page, it extracts
56
+ the text and passes it to the _extract_aggressive_content method for further processing. The identified
57
+ aggressive terms are concatenated and returned. If any errors occur during PDF processing, they are logged for
58
+ debugging.
59
+ """
60
+ try:
61
+ # Open the multi-page PDF using PdfReader
62
+ pdf = PdfReader(pdf_file_path)
63
+
64
+ aggressive_terms = ""
65
+
66
+ # Extract text from each page and pass it to the process_text function
67
+ for page_number in range(len(pdf.pages)):
68
+ # Extract text from the page
69
+ page = pdf.pages[page_number]
70
+ text = page.extract_text()
71
+
72
+ # Pass the text to the process_text function for further processing
73
+ aggressive_terms += self._extract_aggressive_content(text)
74
+ return aggressive_terms
75
+
76
+ except Exception as e:
77
+ print(f"An error occurred while processing the PDF document: {str(e)}")
78
+
79
+ def file_output_fnn(self,file_path):
80
+ file_path = file_path.name
81
+ return file_path
82
+
83
+ def gradio_interface(self):
84
+ with gr.Blocks(css="style.css",theme='xiaobaiyuan/theme_brief') as demo:
85
+ with gr.Row(elem_id = "col-container",scale=0.80):
86
+ with gr.Column(elem_id = "col-container",scale=0.80):
87
+ file1 = gr.File(label="File",elem_classes="filenameshow")
88
+
89
+ with gr.Column(elem_id = "col-container",scale=0.20):
90
+ upload_button1 = gr.UploadButton(
91
+ "Browse File",file_types=[".txt", ".pdf", ".doc", ".docx",".json",".csv"],
92
+ elem_classes="uploadbutton")
93
+ aggressive_content = gr.Button("Get Headings",elem_classes="uploadbutton")
94
+
95
+ with gr.Row(elem_id = "col-container",scale=0.60):
96
+ headings = gr.Textbox(label = "Headings")
97
+
98
+ upload_button1.upload(self.file_output_fnn,upload_button1,file1)
99
+ aggressive_content.click(self.get_aggressive_content,upload_button1,headings)
incompletesentencefinder.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fitz # PyMuPDF
2
+ import openai
3
+ import gradio as gr
4
+
5
+ class IncompleteSentenceFinder:
6
+ """
7
+ This class finds and displays incomplete sentences in a PDF document using OpenAI's GPT-3.
8
+
9
+ Args:
10
+ api_key (str): Your OpenAI API key.
11
+ """
12
+
13
+ def __init__(self):
14
+ """
15
+ Initialize the IncompleteSentenceFinder with the PDF file and OpenAI API key.
16
+
17
+ Args:
18
+ api_key (str): Your OpenAI API key.
19
+ """
20
+ # openai.api_key = openai_api_key
21
+ pass
22
+
23
+ def _check_incomplete_sentence(self, text: str) -> str:
24
+
25
+ """
26
+ Use OpenAI's GPT-3 to identify incomplete sentences in the given text.
27
+
28
+ Args:
29
+ text (str): Text to check for incomplete sentences.
30
+
31
+ Returns:
32
+ str: Incomplete sentences identified by GPT-3.
33
+ """
34
+ # Create a request to OpenAI's GPT-3 engine to identify incomplete sentences.
35
+ response = openai.Completion.create(
36
+ engine="text-davinci-003",
37
+ prompt=f"list out the incomplete sentences in the following text:\n{text}",
38
+ max_tokens=1000,
39
+ )
40
+
41
+ # Extract and strip the text of identified incomplete sentences from the GPT-3 response.
42
+ incomplete_sentences = response.choices[0].text.strip()
43
+
44
+ print("incomplete_sentences Extracted Successfully!")
45
+
46
+ return incomplete_sentences
47
+
48
+ def get_incomplete_sentence(self,pdf_file) -> str:
49
+
50
+ """
51
+ Extract text from the PDF document and find incomplete sentences.
52
+
53
+ Returns:
54
+ str: Incomplete sentences identified by GPT-3.
55
+ """
56
+ try:
57
+ # Open the PDF file using PyMuPDF's fitz library
58
+ doc = fitz.open(pdf_file)
59
+ incomplete_text = ""
60
+
61
+ # Iterate through each page in the PDF document and extract the text
62
+ for page in doc:
63
+ text = page.get_text()
64
+ incomplete_text += self._check_incomplete_sentence(text)
65
+
66
+ return incomplete_text
67
+
68
+ except Exception as e:
69
+ print(f"An error occurred: {str(e)}")
70
+
71
+ def file_output_fnn(self,file_path):
72
+
73
+ file_path = file_path.name
74
+ return file_path
75
+
76
+ def gradio_interface(self):
77
+
78
+ with gr.Blocks(css="style.css",theme='xiaobaiyuan/theme_brief') as demo:
79
+ with gr.Row(elem_id = "col-container",scale=0.80):
80
+ with gr.Column(elem_id = "col-container",scale=0.80):
81
+ file1 = gr.File(label="File",elem_classes="filenameshow")
82
+
83
+ with gr.Column(elem_id = "col-container",scale=0.20):
84
+ upload_button1 = gr.UploadButton(
85
+ "Browse File",file_types=[".txt", ".pdf", ".doc", ".docx",".json",".csv"],
86
+ elem_classes="uploadbutton")
87
+ incomplete_sentence_btn = gr.Button("Get Headings",elem_classes="uploadbutton")
88
+
89
+ with gr.Row(elem_id = "col-container",scale=0.60):
90
+ headings = gr.Textbox(label = "Headings")
91
+
92
+ upload_button1.upload(self.file_output_fnn,upload_button1,file1)
93
+ incomplete_sentence_btn.click(self.get_incomplete_sentence,upload_button1,headings)
incorrect_sentence_finder.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fitz # PyMuPDF
2
+ import openai
3
+ import gradio as gr
4
+
5
+ class IncorrectSentenceFinder:
6
+ """
7
+ This class finds and displays grammatically incorrect sentences in a PDF document using OpenAI's GPT-3.
8
+
9
+ Args:
10
+ pdf_file (str): The path to the PDF file.
11
+ """
12
+
13
+ def __init__(self):
14
+ """
15
+ Initialize the IncorrectSentenceFinder with the OpenAI API key.
16
+ """
17
+ # openai.api_key = openai_api_key
18
+ pass
19
+
20
+ def _find_incorrect_sentence(self, text: str) -> str:
21
+ """
22
+ Use OpenAI's GPT-3 to identify grammatically incorrect sentences in the given text.
23
+
24
+ Args:
25
+ text (str): Text to check for grammatical errors.
26
+
27
+ Returns:
28
+ str: Grammatically incorrect sentences identified by GPT-3.
29
+ """
30
+ # Create a request to OpenAI's GPT-3 engine to identify grammatically incorrect sentences.
31
+ response = openai.Completion.create(
32
+ engine="text-davinci-003",
33
+ prompt=f"list out the grammatical error sentence in the given text:\n{text}",
34
+ temperature=0,
35
+ max_tokens=1000,
36
+ )
37
+
38
+ # Extract and strip the text of identified grammatically incorrect sentences from the GPT-3 response.
39
+ incorrect_sentences = response.choices[0].text.strip()
40
+ return incorrect_sentences
41
+
42
+ def get_incorrect_sentence(self, pdf_file: str) -> str:
43
+ """
44
+ Extract text from the PDF document and find grammatically incorrect sentences.
45
+
46
+ Returns:
47
+ str: Grammatically incorrect sentences identified by GPT-3.
48
+ """
49
+ try:
50
+ # Open the PDF file using PyMuPDF's fitz library
51
+ doc = fitz.open(pdf_file)
52
+ incorrect_sentences = ''
53
+ # Iterate through each page in the PDF document and extract the text
54
+ for page in doc:
55
+ text = page.get_text()
56
+ incorrect_sentences += self._find_incorrect_sentence(text)
57
+ return incorrect_sentences
58
+
59
+ except Exception as e:
60
+ print(f"An error occurred: {str(e)}")
61
+ def file_output_fnn(self,file_path):
62
+ file_path = file_path.name
63
+ return file_path
64
+
65
+ def gradio_interface(self):
66
+ with gr.Blocks(css="style.css",theme='xiaobaiyuan/theme_brief') as demo:
67
+ with gr.Row(elem_id = "col-container",scale=0.80):
68
+ with gr.Column(elem_id = "col-container",scale=0.80):
69
+ file1 = gr.File(label="File",elem_classes="filenameshow")
70
+
71
+ with gr.Column(elem_id = "col-container",scale=0.20):
72
+ upload_button1 = gr.UploadButton(
73
+ "Browse File",file_types=[".txt", ".pdf", ".doc", ".docx",".json",".csv"],
74
+ elem_classes="uploadbutton")
75
+ incorrect_sentence = gr.Button("Get Headings",elem_classes="uploadbutton")
76
+
77
+ with gr.Row(elem_id = "col-container",scale=0.60):
78
+ headings = gr.Textbox(label = "Headings")
79
+
80
+ upload_button1.upload(self.file_output_fnn,upload_button1,file1)
81
+ incorrect_sentence.click(self.get_incorrect_sentence,upload_button1,headings)