lekkalar commited on
Commit
928a91a
·
0 Parent(s):

Duplicate from lekkalar/chatgpt-for-pdf-using-langchain-gpt4-chromadb-prompttemplate-tabs-dataframe-ocrmypdf-sqlite-csv

Browse files
Files changed (5) hide show
  1. .gitattributes +34 -0
  2. README.md +16 -0
  3. app.py +598 -0
  4. packages.txt +2 -0
  5. requirements.txt +13 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: >-
3
+ ChatGPT For PDF - langchain,gpt4,chromadb,promptTemplate,ocrmypdf,sqlite,admin
4
+ page,dataframe,csv,tabs
5
+ emoji: 👁
6
+ colorFrom: indigo
7
+ colorTo: gray
8
+ sdk: gradio
9
+ sdk_version: 3.33.1
10
+ app_file: app.py
11
+ pinned: false
12
+ duplicated_from: >-
13
+ lekkalar/chatgpt-for-pdf-using-langchain-gpt4-chromadb-prompttemplate-tabs-dataframe-ocrmypdf-sqlite-csv
14
+ ---
15
+
16
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,598 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import time
4
+ import pandas as pd
5
+ import sqlite3
6
+ import ocrmypdf
7
+
8
+ from langchain.document_loaders import OnlinePDFLoader #for laoding the pdf
9
+ from langchain.embeddings import OpenAIEmbeddings # for creating embeddings
10
+ from langchain.text_splitter import CharacterTextSplitter
11
+ from langchain.vectorstores import Chroma # for the vectorization part
12
+ from langchain.chains import RetrievalQA # for conversing with chatGPT
13
+ from langchain.chat_models import ChatOpenAI # the LLM model we'll use (ChatGPT)
14
+ from langchain import PromptTemplate
15
+
16
+
17
+ def create_db_connection():
18
+ DB_FILE = "./questionset.db"
19
+ connection = sqlite3.connect(DB_FILE,check_same_thread=False)
20
+ return connection
21
+
22
+ def create_sqlite_table(connection):
23
+ print("*****Entered the create_sqlite_table method*****")
24
+ cursor = connection.cursor()
25
+ # Create table if it doesn't already exist
26
+ try:
27
+ data = f'SELECT * FROM questions'
28
+ cursor.execute(data)
29
+ cursor.fetchall()
30
+
31
+ except sqlite3.OperationalError:
32
+ cursor.execute(
33
+ '''
34
+ CREATE TABLE questions (document_type TEXT NOT NULL, questionset_tag TEXT NOT NULL, field TEXT NOT NULL, question TEXT NOT NULL)
35
+ ''')
36
+ print("*****questions table has been created******")
37
+ connection.commit()
38
+
39
+ def load_master_questionset_into_sqlite(connection):
40
+ create_sqlite_table(connection)
41
+ cursor = connection.cursor()
42
+ #Check to make sure the masterlist has not been loaded already.
43
+ masterlist_for_DOT_count = cursor.execute("Select COUNT(document_type) from questions where document_type=? and questionset_tag=?",("DOT","masterlist",),).fetchone()[0]
44
+ if masterlist_for_DOT_count == 0:
45
+ print("DOT masterlist has not yet been loaded, proceeding to load.")
46
+ #Create a list of questions around the relevant fields of a Deed of Trust(DOT) document
47
+ fieldListForDOT, queryListForDOT = create_field_and_question_list_for_DOT()
48
+ #Create a list of questions around the relevant fields of a TRANSMITTAL SUMMARY document
49
+ fieldListForTransmittalSummary, queryListForTransmittalSummary = create_field_and_question_list_for_Transmittal_Summary()
50
+ #Loop thru the list and add them into the questions table
51
+ i = 0
52
+ print("*****Entered the load master question set method*****")
53
+ while i < len(queryListForDOT):
54
+ cursor.execute("INSERT INTO questions(document_type, questionset_tag, field, question) VALUES(?,?,?,?)", ["DOT", "masterlist", fieldListForDOT[i], queryListForDOT[i]])
55
+ i = i+1
56
+ i = 0
57
+ while i < len(queryListForTransmittalSummary):
58
+ cursor.execute("INSERT INTO questions(document_type, questionset_tag, field, question) VALUES(?,?,?,?)", ["Transmittal Summary", "masterlist", fieldListForTransmittalSummary[i], queryListForTransmittalSummary[i]])
59
+ i = i+1
60
+ connection.commit()
61
+ total_questions = cursor.execute("Select COUNT(document_type) from questions").fetchone()[0]
62
+ print("*******Total number of questions in the DB:", total_questions)
63
+
64
+ def create_field_and_question_list_for_DOT():
65
+ #Create a list of questions around the relevant fields of a Deed of Trust(DOT) document
66
+ query1 = "what is the Loan Number?"
67
+ field1 = "Loan Number"
68
+ query2 = "Who is the Borrower?"
69
+ field2 = "Borrower"
70
+ query3 = "what is the Case Number?"
71
+ field3 = "Case Number"
72
+ query4 = "what is the Mortgage Identification number?"
73
+ field4 = "MIN Number"
74
+ query5 = "DOT signed date?"
75
+ field5 = "Signed Date"
76
+ query6 = "Who is the Lender?"
77
+ field6 = "Lender"
78
+ query7 = "what is the VA/FHA Number?"
79
+ field7 = "VA/FHA Number"
80
+ query8 = "Who is the Co-Borrower?"
81
+ field8 = "Co-Borrower"
82
+ query9 = "What is the property type - single family, multi family?"
83
+ field9 = "Property Type"
84
+ query10 = "what is the Property Address?"
85
+ field10 = "Property Address"
86
+ query11 = "In what County is the property located?"
87
+ field11 = "Property County"
88
+ query12 = "what is the Electronically recorded date"
89
+ field12 = "Electronic Recording Date"
90
+ queryList = [query1, query2, query3, query4, query5, query6, query7, query8, query9, query10, query11,query12]
91
+ fieldList = [field1, field2, field3, field4, field5, field6, field7, field8, field9, field10, field11,field12]
92
+ return fieldList, queryList
93
+
94
+ def create_field_and_question_list_for_Transmittal_Summary():
95
+ #Create a list of questions around the relevant fields of a TRANSMITTAL SUMMARY document
96
+ query1 = "Who is the Borrower?"
97
+ field1 = "Borrower"
98
+ query2 = "what is the Property Address?"
99
+ field2 = "Property Address"
100
+ query3 = "what is the Loan Term?"
101
+ field3 = "Loan Term"
102
+ query4 = "What is the Base Income?"
103
+ field4 = "Base Income"
104
+ query5 = "what is the Borrower's SSN?"
105
+ field5 = "Borrower's SSN"
106
+ query6 = "Who is the Co-Borrower?"
107
+ field6 = "Co-Borrower"
108
+ query7 = "What is the Original Loan Amount?"
109
+ field7 = "Original Loan Amount"
110
+ query8 = "What is the Initial P&I payment?"
111
+ field8 = "Initial P&I payment"
112
+ query9 = "What is the Co-Borrower's SSN?"
113
+ field9 = "Co-Borrower’s SSN"
114
+ query10 = "Number of units?"
115
+ field10 = "Units#"
116
+ query11 = "Who is the Seller?"
117
+ field11 = "Seller"
118
+ query12 = "Document signed date?"
119
+ field12 = "Signed Date"
120
+ queryList = [query1, query2, query3, query4, query5, query6, query7, query8, query9, query10, query11,query12]
121
+ fieldList = [field1, field2, field3, field4, field5, field6, field7, field8, field9, field10, field11,field12]
122
+ return fieldList, queryList
123
+
124
+ def retrieve_document_type_and_questionsettag_from_sqlite():
125
+ connection = create_db_connection()
126
+ load_master_questionset_into_sqlite(connection)
127
+ cursor = connection.cursor()
128
+ rows = cursor.execute("SELECT document_type, questionset_tag FROM questions order by document_type, upper(questionset_tag)").fetchall()
129
+ print("Number of rows retrieved from DB:",len(rows))
130
+ list_for_dropdown = []
131
+ for i in rows:
132
+ entries_in_row = list(i)
133
+ concatenated_value = entries_in_row[0]+ ":" + entries_in_row[1]
134
+ if concatenated_value in list_for_dropdown:
135
+ print("Value already in the list:", concatenated_value)
136
+ else:
137
+ list_for_dropdown.append(concatenated_value)
138
+ print(concatenated_value)
139
+
140
+ print("Number of unique entries found in the DB:",len(list_for_dropdown))
141
+ connection.close()
142
+ return gr.Dropdown.update(choices=list_for_dropdown,value=list_for_dropdown[0])
143
+
144
+
145
+ def retrieve_fields_and_questions(dropdownoption):
146
+ #dropdownoption will be in the documentType:questionSetTag format
147
+ print("dropdownoption is:", dropdownoption)
148
+ splitwords = dropdownoption.split(":")
149
+ connection = create_db_connection()
150
+ cursor = connection.cursor()
151
+ fields_and_questions = cursor.execute("SELECT document_type,field, question FROM questions where document_type=? and questionset_tag=?",(splitwords[0],splitwords[1],),).fetchall()
152
+ connection.close()
153
+ return pd.DataFrame(fields_and_questions, columns=["documentType","field", "question"])
154
+
155
+ def add_questionset(data, document_type, tag_for_questionset):
156
+ # loop through the rows using iterrows()
157
+ connection = create_db_connection()
158
+ create_sqlite_table(connection)
159
+ cursor = connection.cursor()
160
+ for index, row in data.iterrows():
161
+ cursor.execute("INSERT INTO questions(document_type, questionset_tag, field, question) VALUES(?,?,?,?)", [document_type, tag_for_questionset, row['field'], row['question']])
162
+ connection.commit()
163
+ connection.close()
164
+
165
+
166
+ def load_pdf_and_generate_embeddings(pdf_doc, relevant_pages):
167
+ os.environ['OPENAI_API_KEY'] = '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'
168
+ #OCR Conversion - skips conversion of pages that already contain text
169
+ pdf_doc = ocr_converter(pdf_doc)
170
+ #Load the pdf file
171
+ loader = OnlinePDFLoader(pdf_doc)
172
+ pages = loader.load_and_split()
173
+ print('pages loaded:', len(pages))
174
+
175
+ #Create an instance of OpenAIEmbeddings, which is responsible for generating embeddings for text
176
+ embeddings = OpenAIEmbeddings()
177
+
178
+ pages_to_be_loaded =[]
179
+
180
+ if relevant_pages:
181
+ page_numbers = relevant_pages.split(",")
182
+ if len(page_numbers) != 0:
183
+ for page_number in page_numbers:
184
+ if page_number.isdigit():
185
+ pageIndex = int(page_number)-1
186
+ if pageIndex >=0 and pageIndex <len(pages):
187
+ pages_to_be_loaded.append(pages[pageIndex])
188
+
189
+ #In the scenario where none of the page numbers supplied exist in the PDF, we will revert to using the entire PDF.
190
+ if len(pages_to_be_loaded) ==0:
191
+ pages_to_be_loaded = pages.copy()
192
+
193
+
194
+ #To create a vector store, we use the Chroma class, which takes the documents (pages in our case) and the embeddings instance
195
+ vectordb = Chroma.from_documents(pages_to_be_loaded, embedding=embeddings)
196
+
197
+ #Finally, we create the bot using the RetrievalQA class
198
+ global pdf_qa
199
+
200
+ prompt_template = """Use the following pieces of context to answer the question at the end. If you do not know the answer, just return N/A. If you encounter a date, return it in mm/dd/yyyy format.
201
+
202
+ {context}
203
+
204
+ Question: {question}
205
+ Return just the answer. Provide the answer in the JSON format and extract the key from the question :"""
206
+ PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
207
+ chain_type_kwargs = {"prompt": PROMPT}
208
+ pdf_qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(temperature=0, model_name="gpt-4"),chain_type="stuff", retriever=vectordb.as_retriever(search_kwargs={"k": 5}), chain_type_kwargs=chain_type_kwargs, return_source_documents=False)
209
+
210
+ return "Ready"
211
+
212
+ def load_csv_and_store_questionset_into_sqlite(csv_file, document_type, tag_for_questionset):
213
+ print('document type is:',document_type)
214
+ print('tag_for_questionset is:',tag_for_questionset)
215
+
216
+ if tag_for_questionset:
217
+ if document_type:
218
+ data = pd.read_csv(csv_file.name)
219
+ add_questionset(data, document_type, tag_for_questionset)
220
+ responseString = "Task Complete. Uploaded {} fields and the corresponding questions into the Database for {}:{}".format(data.shape[0], document_type,tag_for_questionset)
221
+ return responseString
222
+ else:
223
+ return "Please select the Document Type and provide a name for the Question Set"
224
+
225
+
226
+
227
+ def answer_predefined_questions(document_type_and_questionset):
228
+ print('chosen document_type_and_questionset:',document_type_and_questionset)
229
+ option_chosen = document_type_and_questionset.split(":")
230
+ document_type = option_chosen[0]
231
+ question_set = option_chosen[1]
232
+ fields =[]
233
+ questions = []
234
+ responses =[]
235
+ connection = create_db_connection()
236
+ cursor = connection.cursor()
237
+ if document_type is not None:
238
+ if question_set is not None:
239
+ #Given the document_type and questionset_tag, retrieve the corresponding fields and questions from the database
240
+ rows = cursor.execute("SELECT field, question FROM questions where document_type=? and questionset_tag=?",(document_type,question_set,),).fetchall()
241
+ for i in rows:
242
+ entries_in_row = list(i)
243
+ fields.append(entries_in_row[0])
244
+ questions.append(entries_in_row[1])
245
+ responses.append(pdf_qa.run(entries_in_row[1]))
246
+ else:
247
+ return "Please choose your Document Type:QuestionSet"
248
+
249
+ return pd.DataFrame({"Field": fields, "Question to gpt-4": questions, "Response from gpt-4": responses})
250
+
251
+
252
+ def ocr_converter(input_file):
253
+ image_pdf = input_file.name
254
+ ocrmypdf.ocr(image_pdf, image_pdf, skip_text=True, language="eng")
255
+ ocrmypdf.ocr(image_pdf, image_pdf, redo_ocr=True, language="eng")
256
+ return image_pdf
257
+
258
+
259
+ def answer_query(query):
260
+ question = query
261
+ return pdf_qa.run(question)
262
+
263
+
264
+ css="""
265
+ #col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
266
+ """
267
+
268
+ title = """
269
+ <div style="text-align: center;max-width: 700px;">
270
+ <h1>Chatbot for PDFs - GPT-4</h1>
271
+ <p style="text-align: center;">Upload a .PDF, click the "Upload PDF and generate embeddings" button, <br />
272
+ Wait for the Status to show Ready. You can chose to get answers to the pre-defined question set OR ask your own question <br />
273
+ The app is built on GPT-4 and leverages PromptTemplate</p>
274
+ </div>
275
+ """
276
+
277
+ with gr.Blocks(css=css,theme=gr.themes.Monochrome()) as demo:
278
+ with gr.Column(elem_id="col-container"):
279
+ gr.HTML(title)
280
+
281
+ with gr.Tab("Chatbot"):
282
+ with gr.Column():
283
+ pdf_doc = gr.File(label="Load a pdf",file_types=['.pdf'],type='file')
284
+ relevant_pages = gr.Textbox(label="*Optional - List comma separated page numbers to load or leave this field blank to use the entire PDF")
285
+
286
+ with gr.Row():
287
+ status = gr.Textbox(label="Status", placeholder="", interactive=False)
288
+ load_pdf = gr.Button("Upload PDF and generate embeddings").style(full_width=False)
289
+
290
+ with gr.Row():
291
+ input = gr.Textbox(label="Type in your question")
292
+ output = gr.Textbox(label="Answer")
293
+ submit_query = gr.Button("Submit your own question to gpt-4").style(full_width=False)
294
+
295
+ with gr.Row():
296
+ questionsets = gr.Dropdown(label="Pre-defined Question Sets stored in the DB", choices=[])
297
+ load_questionsets = gr.Button("Retrieve Pre-defined Question Sets from DB").style(full_width=False)
298
+ fields_and_questions = gr.Dataframe(label="Fields and Questions in the chosen Question Set")
299
+ load_fields_and_questions = gr.Button("Retrieve Pre-defined Questions from the DB for the chosen QuestionSet").style(full_width=False)
300
+
301
+ with gr.Row():
302
+ answers = gr.Dataframe(label="Answers to Predefined Question set")
303
+ answers_for_predefined_question_set = gr.Button("Get gpt-4 answers to the chosen pre-defined question set").style(full_width=False)
304
+
305
+ with gr.Tab("OCR Converter"):
306
+ with gr.Column():
307
+ image_pdf = gr.File(label="Load the pdf to be converted",file_types=['.pdf'],type='file')
308
+
309
+ with gr.Row():
310
+ ocr_pdf = gr.File(label="OCR'd pdf", file_types=['.pdf'],type='file',file_count="single")
311
+ convert_into_ocr = gr.Button("Convert").style(full_width=False)
312
+
313
+
314
+ with gr.Tab("Upload Question Set"):
315
+ with gr.Column():
316
+ document_types =["Mortgage 1040 US Individual Tax Returns 8453 Elec Form",
317
+ "Mortgage 1098",
318
+ "Mortgage 1099",
319
+ "Mortgage Abstract",
320
+ "Mortgage ACH Authorization Form",
321
+ "Mortgage Advance Fee Agreement",
322
+ "Mortgage Affidavit",
323
+ "Mortgage Affidavit of Suspense Funds",
324
+ "Mortgage Agreement Documents",
325
+ "Mortgage Sales Contract",
326
+ "Mortgage Loan Estimate",
327
+ "Mortgage Alimony Or Child Support",
328
+ "Mortgage Amended Proof Of Claim",
329
+ "Mortgage Amortization Schedule",
330
+ "Mortgage Flood Insurance",
331
+ "Mortgage Appraisal Report",
332
+ "Mortgage Appraisal Disclosure",
333
+ "Mortgage ARM Letter",
334
+ "Mortgage Arms Length Affidavit",
335
+ "Mortgage Assignment-Recorded",
336
+ "Mortgage Assignment-Unrecorded",
337
+ "Mortgage Assignment of Rent or Lease",
338
+ "Mortgage Automated Value Model",
339
+ "Mortgage Award Letters",
340
+ "Mortgage Bailee Letter",
341
+ "Mortgage Balloon Disclosure",
342
+ "Mortgage Bank Statement",
343
+ "Mortgage Bankruptcy Documents",
344
+ "Mortgage Bill of Sale",
345
+ "Mortgage Billing Statement",
346
+ "Mortgage Birth-Marriage-Death Certificate",
347
+ "Mortgage Borrower Certification Authorization",
348
+ "Mortgage Borrower Response Package",
349
+ "Mortgage Brokers Price Opinion",
350
+ "Mortgage Business Plan",
351
+ "Mortgage Buydown Agreement",
352
+ "Mortgage Bylaws Covenants Conditions Restrictions",
353
+ "Mortgage Cash for Keys",
354
+ "Mortgage Certificate of Redemption",
355
+ "Mortgage Certificate of Sale",
356
+ "Mortgage Certificate of Title",
357
+ "Mortgage Certification of Amount Due Payoff Reinstatement",
358
+ "Mortgage Checks-Regular or Cashiers",
359
+ "Mortgage Closing Disclosure",
360
+ "Mortgage Closing Protection Letter",
361
+ "Mortgage Closing Other",
362
+ "Mortgage Code Violations",
363
+ "Mortgage Request for Release",
364
+ "Mortgage Certificate of Liability Insurance",
365
+ "Mortgage Commitment Letter",
366
+ "Mortgage Complaint",
367
+ "Mortgage Complaint Answer Counter Claim",
368
+ "Mortgage Conditional Approval Letter",
369
+ "Mortgage Conditional Commitment",
370
+ "Mortgage Consent Order",
371
+ "Mortgage Consolidated Mortgage CEMA",
372
+ "Mortgage Conveyance Claims",
373
+ "Mortgage Correction and Revision Agreement",
374
+ "Mortgage Correspondence",
375
+ "Mortgage Court Order Settlement Divorce Decree",
376
+ "Mortgage Credit Report",
377
+ "Mortgage Customer Signature Authorization",
378
+ "Mortgage Debt Validation",
379
+ "Mortgage Deed",
380
+ "Mortgage Default Notices",
381
+ "Mortgage Direct Debit Authorization Form",
382
+ "Mortgage Disclosure Documents",
383
+ "Mortgage Document Checklist",
384
+ "Mortgage Document Correction and Fee Due Agreement",
385
+ "Mortgage Dodd Frank Certification",
386
+ "Mortgage Drivers License",
387
+ "Mortgage Request for VOE",
388
+ "Mortgage Environmental Indemnity Agreement",
389
+ "Mortgage Equal Credit Opportunity Act Notice",
390
+ "Mortgage Escrow Agreement",
391
+ "Mortgage Escrow Analysis Trial Balance Worksheet",
392
+ "Mortgage Instructions to Escrow Agent",
393
+ "Mortgage Escrow Letters",
394
+ "Mortgage Executed Deeds",
395
+ "Mortgage Fair Lending Notice",
396
+ "Mortgage Foreclosure Complaint",
397
+ "Mortgage Foreclosure Judgement",
398
+ "Mortgage Foreclosure Sale",
399
+ "Mortgage FHA Neighborhood Watch",
400
+ "Mortgage Truth-In-Lending Disclosure Statement",
401
+ "Mortgage Financial Form",
402
+ "Mortgage Financing Agreement",
403
+ "Mortgage First Payment Letter",
404
+ "Mortgage Forced Place Insurance Documents",
405
+ "Mortgage Foreclosure Documents",
406
+ "Mortgage Good Faith Estimate",
407
+ "Mortgage Guaranty",
408
+ "Mortgage HAMP Certifications",
409
+ "Mortgage HOA-Condo Covenants and Dues",
410
+ "Mortgage Exemption Hold Harmless Letter",
411
+ "Mortgage Home Equity Signature Verification Card",
412
+ "Mortgage Home Inspection",
413
+ "Mortgage Property Liability Insurance",
414
+ "Mortgage Homeowners Insurance Notice",
415
+ "Mortgage HUD-1 Settlement Statement",
416
+ "Mortgage Income Other",
417
+ "Mortgage Indemnity Agreement",
418
+ "Mortgage Informed Consumer Choice Disclosure Notice",
419
+ "Mortgage Initial Escrow Account Disclosure Statement",
420
+ "Mortgage Invoices",
421
+ "Mortgage Land Lease or Land Trust",
422
+ "Mortgage Land Title Adjustment",
423
+ "Mortgage Last Will and Testament",
424
+ "Mortgage Legal Description",
425
+ "Mortgage Letters Of Administration",
426
+ "Mortgage Letters of Testamentary",
427
+ "Mortgage Listing Agreement",
428
+ "Mortgage Litigation Guarantee",
429
+ "Mortgage DIL Closing",
430
+ "Mortgage Hardship Letter",
431
+ "Mortgage Hardship Affidavit",
432
+ "Mortgage Home Affordable Modification Agreement",
433
+ "Mortgage Profit And Loss",
434
+ "Mortgage Earnest Money Promissory Note",
435
+ "Mortgage Rental Agreement",
436
+ "Mortgage Repayment Plan",
437
+ "Mortgage Short Sale Miscellaneous",
438
+ "Mortgage LM - Trial Offer Letter or Plan",
439
+ "Mortgage Errors and Omissions Agreement",
440
+ "Mortgage Custom Type 2",
441
+ "Mortgage Custom Type 1",
442
+ "Mortgage Loan Agreement",
443
+ "Mortgage Loan Closing Information Summary",
444
+ "Mortgage Loan Modification",
445
+ "Mortgage Loan Summary Report",
446
+ "Mortgage Lock Confirmation",
447
+ "Mortgage Loss Drafts",
448
+ "Mortgage Loss Mitigation",
449
+ "Mortgage Lost Assignment Affidavit",
450
+ "Mortgage Mech Lien",
451
+ "Mortgage Mediation",
452
+ "Mortgage MI Claim Explanation of Benefits",
453
+ "Mortgage MI Policy Cancellation Document",
454
+ "Mortgage MI Repurchase Document",
455
+ "Mortgage Miscellaneous Lien Release",
456
+ "Mortgage Mobile Home Documentation",
457
+ "Mortgage Monthly Activity Report",
458
+ "Mortgage Deed of Trust-Recorded",
459
+ "Mortgage PMI Disclosure",
460
+ "Mortgage Payments",
461
+ "Mortgage Deed of Trust-Unrecorded",
462
+ "Mortgage Motion For Relief",
463
+ "Mortgage Note",
464
+ "Mortgage Note Affidavit",
465
+ "Mortgage Note Endorsements",
466
+ "Mortgage Notice Of Appearance",
467
+ "Mortgage Notice of Default Filedrecorded",
468
+ "Mortgage Notice of Final Cure",
469
+ "Mortgage Notice of Levy",
470
+ "Mortgage Notice of Payment Change",
471
+ "Mortgage Notice of Right to Cancel",
472
+ "Mortgage Notice of Sale",
473
+ "Mortgage Notice of Second Lien",
474
+ "Mortgage Notice of Servicing Transfer-Transferee",
475
+ "Mortgage Notice of Servicing Transfer-Transferor",
476
+ "Mortgage Notice of Termination",
477
+ "Mortgage Notice to Quit",
478
+ "Mortgage Objection to Claim",
479
+ "Mortgage Processing and Underwriting Doc Set",
480
+ "Mortgage Objection to Motion for Relief",
481
+ "Mortgage Affidavit of Occupancy",
482
+ "Mortgage Occupancy Agreement",
483
+ "Mortgage Occupancy Termination Agreement",
484
+ "Mortgage Ombudsman Documents",
485
+ "Mortgage Owner Affidavit",
486
+ "Mortgage Ownership and Encumbrances Report",
487
+ "Mortgage Pay History External",
488
+ "Mortgage Paystub",
489
+ "Mortgage Payoff Demand Statement",
490
+ "Mortgage PMI Certificate",
491
+ "Mortgage Post Petition Fee Notices",
492
+ "Mortgage Post Sale Documents",
493
+ "Mortgage Power of Attorney-Recorded",
494
+ "Mortgage Power of Attorney-Unrecorded",
495
+ "Mortgage Closing Instructions",
496
+ "Mortgage Preliminary Modification",
497
+ "Mortgage Merged-Privacy Policy Notice-Title Policy - Privacy Policy-1098 Privacy Policy",
498
+ "Mortgage Probate Court Order",
499
+ "Mortgage Proof of Claim",
500
+ "Mortgage Property Legal and Vesting Report",
501
+ "Mortgage Property Management Agreement",
502
+ "Mortgage Property Notices",
503
+ "Mortgage Public Assistance",
504
+ "Mortgage Record Owner and Lien Certificate",
505
+ "Mortgage Recorded Satisfaction",
506
+ "Mortgage Regfore Affidavit Executed",
507
+ "Mortgage Release of Lis Pendens",
508
+ "Mortgage REO Bids",
509
+ "Mortgage REO Other",
510
+ "Mortgage Form 26-1820 Report and Certificate of Loan Disbursement",
511
+ "Mortgage Request for Verification of Rent or Mortgage",
512
+ "Mortgage Request for Waiver of R.E. Tax Escrow Requirements",
513
+ "Mortgage 1003",
514
+ "Mortgage RMA Package",
515
+ "Mortgage Sale Postponement",
516
+ "Mortgage Sale or Milestone Rescission",
517
+ "Mortgage Satisfaction of Judgement Tax Mortgage Liens",
518
+ "Mortgage Security Agreement",
519
+ "Mortgage Separation Agreement",
520
+ "Mortgage Servicing Acquisition",
521
+ "Mortgage Servicing Disclosure Statement",
522
+ "Mortgage Short Payoffs",
523
+ "Mortgage Signature-Name Affidavit",
524
+ "Mortgage Assumption of Mortgage",
525
+ "Mortgage SCRA Related Documents",
526
+ "Mortgage Social Security Card or Customer ID",
527
+ "Mortgage Soft Delete",
528
+ "Mortgage Flood Hazard Determination Form",
529
+ "Mortgage Stipulated Agreement",
530
+ "Mortgage Subordination Agreement",
531
+ "Mortgage Subordination Request Form",
532
+ "Mortgage Appointment of Substitute Trustee",
533
+ "Mortgage Merged-Real Estate Taxes-Tax Bill-Tax Certificate",
534
+ "Mortgage Tax Certificate",
535
+ "Mortgage Tax Record Information Sheet",
536
+ "Mortgage Tax Liens",
537
+ "Mortgage Tax Search",
538
+ "Mortgage Third Party Authorization",
539
+ "Mortgage Title Commitment-Equity or Property Report",
540
+ "Mortgage Title Policy",
541
+ "Mortgage Title Policy Endorsement",
542
+ "Mortgage Title Search",
543
+ "Mortgage Title Insurance Other",
544
+ "Mortgage Transfer of Claim",
545
+ "Mortgage Uniform Underwriting and Transmittal Summary",
546
+ "Mortgage Trustee Sale Guarantee",
547
+ "Mortgage UCC-1 Financing Statement",
548
+ "Mortgage Others",
549
+ "Mortgage Unknown",
550
+ "Mortgage Utility Bill",
551
+ "Mortgage Valuation Orders",
552
+ "Mortgage Verification Document Set",
553
+ "Mortgage Verification of Service for Military Home Buyers",
554
+ "Mortgage W2",
555
+ "Mortgage W9",
556
+ "Mortgage Wire Transfer Instructions",
557
+ "Mortgage Workmens Compensation",
558
+ "Mortgage Writ of Possession",
559
+ "Mortgage Cover Page",
560
+ "Mortgage Barcode Page",
561
+ "Mortgage Wisconsin Tax Escrow Option Notice",
562
+ "Mortgage Hazard Insurance Declaration",
563
+ "Mortgage Flood Insurance Declaration",
564
+ "Mortgage Quitclaim Deed",
565
+ "Mortgage Tax Deed",
566
+ "Mortgage Warranty Deed",
567
+ "Mortgage ALTA Settlement Statement",
568
+ "Mortgage Home Inspection Waiver",
569
+ "Mortgage Insurance Disclosure"]
570
+ document_type_for_questionset = gr.Dropdown(choices=document_types, label="Select the Document Type")
571
+ tag_for_questionset = gr.Textbox(label="Please provide a name for the question set. Ex: rwikd-dot-basic-questionset-20230707.")
572
+ csv_file = gr.File(label="Load a csv - 2 columns with the headers as field, question",file_types=['.csv'],type='file')
573
+
574
+
575
+ with gr.Row():
576
+ status_for_loading_csv = gr.Textbox(label="Status", placeholder="", interactive=False)
577
+ load_csv = gr.Button("Upload data into the database").style(full_width=False)
578
+
579
+
580
+ load_pdf.click(load_pdf_and_generate_embeddings, inputs=[pdf_doc, relevant_pages], outputs=status)
581
+ load_csv.click(load_csv_and_store_questionset_into_sqlite, inputs=[csv_file, document_type_for_questionset, tag_for_questionset], outputs=status_for_loading_csv)
582
+
583
+ load_questionsets.click(retrieve_document_type_and_questionsettag_from_sqlite,outputs=questionsets)
584
+ load_fields_and_questions.click(retrieve_fields_and_questions,questionsets,fields_and_questions)
585
+ answers_for_predefined_question_set.click(answer_predefined_questions, questionsets, answers)
586
+
587
+ convert_into_ocr.click(ocr_converter,image_pdf, ocr_pdf)
588
+ submit_query.click(answer_query,input,output)
589
+
590
+
591
+ #Use this flavor of demo.launch if you need the app to have an admin page. The credentials to login in this case
592
+ #would be admin/lm0R!Rm0#97r
593
+ #demo.launch(auth=("admin", "lm0R!Rm0#97r"))
594
+ demo.launch(debug=True)
595
+
596
+
597
+
598
+
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ tesseract-ocr-eng
2
+ ghostscript
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ openai
2
+ tiktoken
3
+ chromadb
4
+ langchain
5
+ unstructured
6
+ unstructured[local-inference]
7
+ pytesseract
8
+ ocrmypdf
9
+
10
+
11
+
12
+
13
+