AbeerTrial commited on
Commit
4aee695
·
1 Parent(s): 26e6817

Upload 14 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ local_db/index.faiss filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import shutil
2
+ import os
3
+
4
+ # def copy_files(source_folder, destination_folder):
5
+ # # Create the destination folder if it doesn't exist
6
+ # if not os.path.exists(destination_folder):
7
+ # os.makedirs(destination_folder)
8
+
9
+ # # Get a list of files in the source folder
10
+
11
+ # files_to_copy = os.listdir(source_folder)
12
+ # for file_name in files_to_copy:
13
+ # source_file_path = os.path.join(source_folder, file_name)
14
+ # destination_file_path = os.path.join(destination_folder, file_name)
15
+
16
+ # # Copy the file to the destination folder
17
+ # shutil.copy(source_file_path, destination_file_path)
18
+
19
+ # print(f"Copied {file_name} to {destination_folder}")
20
+
21
+ # # Specify the source folder and destination folder paths
22
+ # source_folder = "/kaggle/input/fiver-app5210"
23
+ # destination_folder = "/local_db"
24
+
25
+ # copy_files(source_folder, destination_folder)
26
+
27
+ # def copy_files(source_folder, destination_folder):
28
+ # # Create the destination folder if it doesn't exist
29
+ # if not os.path.exists(destination_folder):
30
+ # os.makedirs(destination_folder)
31
+
32
+ # # Get a list of files in the source folder
33
+ # files_to_copy = os.listdir(source_folder)
34
+
35
+ # for file_name in files_to_copy:
36
+ # source_file_path = os.path.join(source_folder, file_name)
37
+ # destination_file_path = os.path.join(destination_folder, file_name)
38
+
39
+ # # Copy the file to the destination folder
40
+ # shutil.copy(source_file_path, destination_file_path)
41
+
42
+ # print(f"Copied {file_name} to {destination_folder}")
43
+
44
+ # # Specify the source folder and destination folder paths
45
+ # source_folder = "/kaggle/input/fiver-app-docs"
46
+ # destination_folder = "/docs"
47
+
48
+ # copy_files(source_folder, destination_folder)
49
+
50
+ import os
51
+ import openai
52
+
53
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
54
+ os.environ["OPENAI_API_KEY"]
55
+
56
+
57
+ def api_key(key):
58
+
59
+ import os
60
+ import openai
61
+
62
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
63
+ os.environ["OPENAI_API_KEY"] = key
64
+ openai.api_key = key
65
+
66
+ return "Successful!"
67
+
68
+ def save_file(input_file):
69
+ import shutil
70
+ import os
71
+
72
+ destination_dir = "/home/user/app/file/"
73
+ os.makedirs(destination_dir, exist_ok=True)
74
+
75
+ output_dir="/home/user/app/file/"
76
+
77
+ for file in input_file:
78
+ shutil.copy(file.name, output_dir)
79
+
80
+ return "File(s) saved successfully!"
81
+
82
+ def process_file():
83
+ from langchain.document_loaders import PyPDFLoader
84
+ from langchain.document_loaders import DirectoryLoader
85
+ from langchain.document_loaders import TextLoader
86
+ from langchain.document_loaders import Docx2txtLoader
87
+ from langchain.vectorstores import FAISS
88
+ from langchain.embeddings.openai import OpenAIEmbeddings
89
+ from langchain.text_splitter import CharacterTextSplitter
90
+ import openai
91
+
92
+ loader1 = DirectoryLoader('/home/user/app/file/', glob="./*.pdf", loader_cls=PyPDFLoader)
93
+ document1 = loader1.load()
94
+
95
+ loader2 = DirectoryLoader('/home/user/app/file/', glob="./*.txt", loader_cls=TextLoader)
96
+ document2 = loader2.load()
97
+
98
+ loader3 = DirectoryLoader('/home/user/app/file/', glob="./*.docx", loader_cls=Docx2txtLoader)
99
+ document3 = loader3.load()
100
+
101
+ document1.extend(document2)
102
+ document1.extend(document3)
103
+
104
+ text_splitter = CharacterTextSplitter(
105
+ separator="\n",
106
+ chunk_size=1000,
107
+ chunk_overlap=200,
108
+ length_function=len)
109
+
110
+ docs = text_splitter.split_documents(document1)
111
+ embeddings = OpenAIEmbeddings()
112
+
113
+ file_db = FAISS.from_documents(docs, embeddings)
114
+ file_db.save_local("/home/user/app/file_db/")
115
+
116
+ return "File(s) processed successfully!"
117
+
118
+ def formatted_response(docs, response):
119
+ formatted_output = response + "\n\nSources"
120
+
121
+ for i, doc in enumerate(docs):
122
+ source_info = doc.metadata.get('source', 'Unknown source')
123
+ page_info = doc.metadata.get('page', None)
124
+
125
+ # Get the file name without the directory path
126
+ file_name = source_info.split('/')[-1].strip()
127
+
128
+ if page_info is not None:
129
+ formatted_output += f"\n{file_name}\tpage no {page_info}"
130
+ else:
131
+ formatted_output += f"\n{file_name}"
132
+
133
+ return formatted_output
134
+
135
+ def search_file(question):
136
+ from langchain.embeddings.openai import OpenAIEmbeddings
137
+ from langchain.vectorstores import FAISS
138
+ from langchain.chains.question_answering import load_qa_chain
139
+ from langchain.callbacks import get_openai_callback
140
+ from langchain.llms import OpenAI
141
+ import openai
142
+ from langchain.chat_models import ChatOpenAI
143
+ embeddings = OpenAIEmbeddings()
144
+ file_db = FAISS.load_local("/home/user/app/file_db/", embeddings)
145
+ docs = file_db.similarity_search(question)
146
+
147
+ llm = ChatOpenAI(model_name='gpt-3.5-turbo')
148
+ chain = load_qa_chain(llm, chain_type="stuff")
149
+ with get_openai_callback() as cb:
150
+ response = chain.run(input_documents=docs, question=question)
151
+ print(cb)
152
+
153
+ return formatted_response(docs, response)
154
+
155
+ def search_local(question):
156
+ from langchain.embeddings.openai import OpenAIEmbeddings
157
+ from langchain.vectorstores import FAISS
158
+ from langchain.chains.question_answering import load_qa_chain
159
+ from langchain.callbacks import get_openai_callback
160
+ from langchain.llms import OpenAI
161
+ import openai
162
+ from langchain.chat_models import ChatOpenAI
163
+ embeddings = OpenAIEmbeddings()
164
+ file_db = FAISS.load_local("/home/user/app/local_db/", embeddings)
165
+ docs = file_db.similarity_search(question)
166
+
167
+ print(docs)
168
+ type(docs)
169
+ llm = ChatOpenAI(model_name='gpt-3.5-turbo')
170
+ chain = load_qa_chain(llm, chain_type="stuff")
171
+ with get_openai_callback() as cb:
172
+ response = chain.run(input_documents=docs, question=question)
173
+ print(cb)
174
+
175
+ return formatted_response(docs, response)
176
+
177
+ def delete_file():
178
+
179
+ import shutil
180
+
181
+ path1 = "/home/user/app/file/"
182
+ path2 = "/home/user/app/file_db/"
183
+
184
+ try:
185
+ shutil.rmtree(path1)
186
+ shutil.rmtree(path2)
187
+ return "Deleted Successfully"
188
+
189
+ except:
190
+ return "Already Deleted"
191
+
192
+ import os
193
+
194
+ def list_files_in_directory(directory):
195
+ file_list = []
196
+ for root, dirs, files in os.walk(directory):
197
+ for file in files:
198
+ file_list.append(file)
199
+ return file_list
200
+
201
+ directory_path = '/home/user/app/docs'
202
+ file_list = list_files_in_directory(directory_path)
203
+
204
+ print("List of file names in the directory:")
205
+ for file_name in file_list:
206
+ print(file_name)
207
+
208
+ def soap_report(doc_name, question):
209
+ from langchain.llms import OpenAI
210
+ from langchain import PromptTemplate, LLMChain
211
+ import openai
212
+ import docx
213
+
214
+ docx_path = '/home/user/app/docs/'+doc_name
215
+
216
+ doc = docx.Document(docx_path)
217
+ extracted_text = 'Extracted text:\n\n\n'
218
+
219
+ for paragraph in doc.paragraphs:
220
+ extracted_text += paragraph.text + '\n'
221
+
222
+ question = "\n\nUse the 'Extracted text' to answer the following question:\n" + question
223
+ extracted_text += question
224
+
225
+ if extracted_text:
226
+ print(extracted_text)
227
+ else:
228
+ print("failed")
229
+
230
+ template = """Question: {question}
231
+
232
+ Answer: Let's think step by step."""
233
+
234
+ prompt = PromptTemplate(template=template, input_variables=["question"])
235
+ llm = OpenAI()
236
+ llm_chain = LLMChain(prompt=prompt, llm=llm)
237
+ response = llm_chain.run(extracted_text)
238
+
239
+ return response
240
+
241
+ def search_gpt(question):
242
+ from langchain.llms import OpenAI
243
+ from langchain import PromptTemplate, LLMChain
244
+
245
+ template = """Question: {question}
246
+
247
+ Answer: Let's think step by step."""
248
+
249
+ prompt = PromptTemplate(template=template, input_variables=["question"])
250
+ llm = OpenAI()
251
+ llm_chain = LLMChain(prompt=prompt, llm=llm)
252
+ response = llm_chain.run(question)
253
+
254
+ return response
255
+
256
+ def local_gpt(question):
257
+ from langchain.llms import OpenAI
258
+ from langchain import PromptTemplate, LLMChain
259
+
260
+ template = """Question: {question}
261
+
262
+ Answer: Let's think step by step."""
263
+
264
+ prompt = PromptTemplate(template=template, input_variables=["question"])
265
+ llm = OpenAI()
266
+ llm_chain = LLMChain(prompt=prompt, llm=llm)
267
+ response = llm_chain.run(question)
268
+
269
+ return response
270
+
271
+ global output
272
+ global response
273
+
274
+ def audio_text(filepath):
275
+ import openai
276
+ global output
277
+
278
+ audio = open(filepath, "rb")
279
+ transcript = openai.Audio.transcribe("whisper-1", audio)
280
+ output = transcript["text"]
281
+
282
+ return output
283
+
284
+ def text_soap():
285
+ from langchain.llms import OpenAI
286
+ from langchain import PromptTemplate, LLMChain
287
+ global output
288
+ global response
289
+ output = output
290
+
291
+ question = "Use the following context given below to generate a detailed SOAP Report:\n\n"
292
+ question += output
293
+ print(question)
294
+
295
+ template = """Question: {question}
296
+
297
+ Answer: Let's think step by step."""
298
+
299
+ prompt = PromptTemplate(template=template, input_variables=["question"])
300
+ llm = OpenAI()
301
+ llm_chain = LLMChain(prompt=prompt, llm=llm)
302
+ response = llm_chain.run(question)
303
+
304
+ return response
305
+
306
+ def docx(name):
307
+ global response
308
+ response = response
309
+ import docx
310
+ path = f"/home/user/app/docs/{name}.docx"
311
+
312
+ doc = docx.Document()
313
+ doc.add_paragraph(response)
314
+ doc.save(path)
315
+
316
+ return "Successfully saved .docx File"
317
+
318
+ import gradio as gr
319
+
320
+ css = """
321
+ .col{
322
+ max-width: 50%;
323
+ margin: 0 auto;
324
+ display: flex;
325
+ flex-direction: column;
326
+ justify-content: center;
327
+ align-items: center;
328
+ }
329
+ """
330
+
331
+ with gr.Blocks(css=css) as demo:
332
+ gr.Markdown("File Chatting App")
333
+
334
+ with gr.Tab("Chat with your Files"):
335
+ with gr.Column(elem_classes="col"):
336
+
337
+ with gr.Tab("Upload and Process your Files"):
338
+ with gr.Column():
339
+
340
+ api_key_input = gr.Textbox(label="Enter your API Key here")
341
+ api_key_button = gr.Button("Submit")
342
+ api_key_output = gr.Textbox(label="Output")
343
+
344
+ file_input = gr.Files(label="Upload your File(s) here")
345
+ upload_button = gr.Button("Upload")
346
+ file_output = gr.Textbox(label="Output")
347
+
348
+ process_button = gr.Button("Process")
349
+ process_output = gr.Textbox(label="Output")
350
+
351
+ with gr.Tab("Ask Questions to your Files"):
352
+ with gr.Column():
353
+
354
+ search_input = gr.Textbox(label="Enter your Question here")
355
+ search_button = gr.Button("Search")
356
+ search_output = gr.Textbox(label="Output")
357
+
358
+ search_gpt_button = gr.Button("Ask ChatGPT")
359
+ search_gpt_output = gr.Textbox(label="Output")
360
+
361
+ delete_button = gr.Button("Delete")
362
+ delete_output = gr.Textbox(label="Output")
363
+
364
+ with gr.Tab("Chat with your Local Files"):
365
+ with gr.Column(elem_classes="col"):
366
+
367
+ local_search_input = gr.Textbox(label="Enter your Question here")
368
+ local_search_button = gr.Button("Search")
369
+ local_search_output = gr.Textbox(label="Output")
370
+
371
+ local_gpt_button = gr.Button("Ask ChatGPT")
372
+ local_gpt_output = gr.Textbox(label="Output")
373
+
374
+ with gr.Tab("Ask Question to SOAP Report"):
375
+ with gr.Column(elem_classes="col"):
376
+
377
+ soap_input = gr.Dropdown(choices=file_list, label="Choose File")
378
+ soap_question = gr.Textbox(label="Enter your Question here")
379
+ soap_button = gr.Button("Submit")
380
+ soap_output = gr.Textbox(label="Output")
381
+
382
+ with gr.Tab("Convert Audio to SOAP Report"):
383
+ with gr.Column(elem_classes="col"):
384
+
385
+ audio_text_input = gr.Audio(source="microphone", type="filepath", label="Upload your Audio File here")
386
+ audio_text_button = gr.Button("Generate Transcript")
387
+ audio_text_output = gr.Textbox(label="Output")
388
+
389
+ text_soap_button = gr.Button("Generate SOAP Report")
390
+ text_soap_output = gr.Textbox(label="Output")
391
+
392
+ docx_input = gr.Textbox(label="Enter the Name of .docx File")
393
+ docx_button = gr.Button("Save .docx File")
394
+ docx_output = gr.Textbox(label="Output")
395
+
396
+ api_key_button.click(api_key, inputs=api_key_input, outputs=api_key_output)
397
+
398
+ upload_button.click(save_file, inputs=file_input, outputs=file_output)
399
+ process_button.click(process_file, inputs=None, outputs=process_output)
400
+
401
+ search_button.click(search_file, inputs=search_input, outputs=search_output)
402
+ search_gpt_button.click(search_gpt, inputs=search_input, outputs=search_gpt_output)
403
+
404
+ delete_button.click(delete_file, inputs=None, outputs=delete_output)
405
+
406
+ local_search_button.click(search_local, inputs=local_search_input, outputs=local_search_output)
407
+ local_gpt_button.click(local_gpt, inputs=local_search_input, outputs=local_gpt_output)
408
+
409
+ soap_button.click(soap_report, inputs=[soap_input, soap_question], outputs=soap_output)
410
+
411
+ audio_text_button.click(audio_text, inputs=audio_text_input, outputs=audio_text_output)
412
+ text_soap_button.click(text_soap, inputs=None, outputs=text_soap_output)
413
+
414
+ audio_text_button.click(audio_text, inputs=audio_text_input, outputs=audio_text_output)
415
+ text_soap_button.click(text_soap, inputs=None, outputs=text_soap_output)
416
+ docx_button.click(docx, inputs=docx_input, outputs=docx_output)
417
+
418
+
419
+ demo.queue()
420
+ demo.launch()
421
+
422
+
423
+
424
+ # # Commented out IPython magic to ensure Python compatibility.
425
+ # #download file_db
426
+
427
+ # # %cd /kaggle/working/
428
+
429
+ # !zip -r "file_db.zip" "file_db"
430
+
431
+ # from IPython.display import FileLink
432
+ # FileLink("file_db.zip")
docs/Benjamin Martinez.docx ADDED
Binary file (27.5 kB). View file
 
docs/David Moore.docx ADDED
Binary file (27.9 kB). View file
 
docs/Isabella Brown.docx ADDED
Binary file (28.3 kB). View file
 
docs/Jackson Lee.docx ADDED
Binary file (27.1 kB). View file
 
docs/Jerry Tylor.docx ADDED
Binary file (27.8 kB). View file
 
docs/Mason Jones.docx ADDED
Binary file (28 kB). View file
 
docs/Olivia Thomas.docx ADDED
Binary file (27 kB). View file
 
docs/Samual Harris.docx ADDED
Binary file (27.5 kB). View file
 
docs/Sophia Johnson.docx ADDED
Binary file (27.9 kB). View file
 
docs/William Anderson.docx ADDED
Binary file (27.6 kB). View file
 
local_db/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd0ec24292a11baff18e2d7dabd979640a377e99ee0ccbd32ea7550c439039b9
3
+ size 2107437
local_db/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e1634917e29728132745ef3406e8d38c76879f209cd4a1d1caad70e5a308443
3
+ size 321281
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ numpy==1.22.0
2
+ langchain
3
+ PyPDF2
4
+ docx2txt
5
+ gradio
6
+ faiss-gpu
7
+ openai
8
+ tiktoken
9
+ python-docx
10
+ git+https://github.com/openai/whisper.git
11
+ sounddevice