Almaatla commited on
Commit
52f59e9
·
verified ·
1 Parent(s): 1665acb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -4,7 +4,7 @@ import langchain
4
  from langchain_huggingface import HuggingFaceEmbeddings
5
  from langchain_community.document_loaders import UnstructuredPDFLoader,UnstructuredWordDocumentLoader
6
  from langchain.indexes import VectorstoreIndexCreator
7
- from langchain_community.vectorstores import FAISS
8
  from zipfile import ZipFile
9
  import gradio as gr
10
  import openpyxl
@@ -136,7 +136,7 @@ def merge_txt_to_db(filename,db,progress,progress_step=0.1):
136
  progress(progress_step,'txt unpacked')
137
  return merge_split_docs_to_db(split_docs,db,progress,progress_step)
138
 
139
- def unpack_zip_file(filename:str,db:FAISS,progress):
140
  with ZipFile(filename, 'r') as zipObj:
141
  contents = zipObj.namelist()
142
  print(f"unpack zip: contents: {contents}")
@@ -144,7 +144,7 @@ def unpack_zip_file(filename:str,db:FAISS,progress):
144
  shutil.unpack_archive(filename, tmp_directory)
145
 
146
  if 'index.faiss' in [item.lower() for item in contents]:
147
- db2 = FAISS.load_local(tmp_directory, embeddings, allow_dangerous_deserialization=True)
148
  db.merge_from(db2)
149
  return db
150
 
@@ -179,11 +179,11 @@ def embed_files(files,ui_session_id,progress=gr.Progress(),progress_step=0.05):
179
  session_id = f"PDFAISS-{ui_session_id}"
180
 
181
  try:
182
- db = FAISS.load_local(session_id,embeddings, allow_dangerous_deserialization=True)
183
  except:
184
  print(f"SESSION: {session_id} database does not exist, create a FAISS db")
185
  #db = FAISS.from_documents([foo], embeddings)
186
- db = FAISS.from_texts(["foo is fou!"],embeddings,[{"source":"foo source"}])
187
  db.save_local(session_id)
188
  print(f"SESSION: {session_id} database created")
189
 
@@ -222,7 +222,7 @@ def embed_files(files,ui_session_id,progress=gr.Progress(),progress_step=0.05):
222
 
223
  ### load the updated db and zip it ###
224
  progress(progress_step, desc = 'loading db')
225
- db = FAISS.load_local(session_id,embeddings, allow_dangerous_deserialization=True)
226
  print("EMBEDDED, after embeddeding: ",session_id,len(db.index_to_docstore_id))
227
  progress(progress_step, desc = 'zipping db for download')
228
  add_files_to_zip(session_id)
@@ -316,7 +316,7 @@ def ask_gpt(query, ui_session_id, history):
316
  return "Please Login", "", ""
317
  session_id = f"PDFAISS-{ui_session_id}"
318
  try:
319
- db = FAISS.load_local(session_id,embeddings, allow_dangerous_deserialization=True)
320
  print("ASKGPT after loading",session_id,len(db.index_to_docstore_id))
321
  except:
322
  print(f"SESSION: {session_id} database does not exist")
 
4
  from langchain_huggingface import HuggingFaceEmbeddings
5
  from langchain_community.document_loaders import UnstructuredPDFLoader,UnstructuredWordDocumentLoader
6
  from langchain.indexes import VectorstoreIndexCreator
7
+ from langchain_community.vectorstores import LangChainFAISS
8
  from zipfile import ZipFile
9
  import gradio as gr
10
  import openpyxl
 
136
  progress(progress_step,'txt unpacked')
137
  return merge_split_docs_to_db(split_docs,db,progress,progress_step)
138
 
139
+ def unpack_zip_file(filename:str,db:LangChainFAISS,progress):
140
  with ZipFile(filename, 'r') as zipObj:
141
  contents = zipObj.namelist()
142
  print(f"unpack zip: contents: {contents}")
 
144
  shutil.unpack_archive(filename, tmp_directory)
145
 
146
  if 'index.faiss' in [item.lower() for item in contents]:
147
+ db2 = LangChainFAISS.load_local(tmp_directory, embeddings, allow_dangerous_deserialization=True)
148
  db.merge_from(db2)
149
  return db
150
 
 
179
  session_id = f"PDFAISS-{ui_session_id}"
180
 
181
  try:
182
+ db = LangChainFAISS.load_local(session_id,embeddings, allow_dangerous_deserialization=True)
183
  except:
184
  print(f"SESSION: {session_id} database does not exist, create a FAISS db")
185
  #db = FAISS.from_documents([foo], embeddings)
186
+ db = LangChainFAISS.from_texts(["foo is fou!"],embeddings,[{"source":"foo source"}])
187
  db.save_local(session_id)
188
  print(f"SESSION: {session_id} database created")
189
 
 
222
 
223
  ### load the updated db and zip it ###
224
  progress(progress_step, desc = 'loading db')
225
+ db = LangChainFAISS.load_local(session_id,embeddings, allow_dangerous_deserialization=True)
226
  print("EMBEDDED, after embeddeding: ",session_id,len(db.index_to_docstore_id))
227
  progress(progress_step, desc = 'zipping db for download')
228
  add_files_to_zip(session_id)
 
316
  return "Please Login", "", ""
317
  session_id = f"PDFAISS-{ui_session_id}"
318
  try:
319
+ db = LangChainFAISS.load_local(session_id,embeddings, allow_dangerous_deserialization=True)
320
  print("ASKGPT after loading",session_id,len(db.index_to_docstore_id))
321
  except:
322
  print(f"SESSION: {session_id} database does not exist")