Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ import langchain
|
|
4 |
from langchain_huggingface import HuggingFaceEmbeddings
|
5 |
from langchain_community.document_loaders import UnstructuredPDFLoader,UnstructuredWordDocumentLoader
|
6 |
from langchain.indexes import VectorstoreIndexCreator
|
7 |
-
from langchain_community.vectorstores import
|
8 |
from zipfile import ZipFile
|
9 |
import gradio as gr
|
10 |
import openpyxl
|
@@ -136,7 +136,7 @@ def merge_txt_to_db(filename,db,progress,progress_step=0.1):
|
|
136 |
progress(progress_step,'txt unpacked')
|
137 |
return merge_split_docs_to_db(split_docs,db,progress,progress_step)
|
138 |
|
139 |
-
def unpack_zip_file(filename:str,db:
|
140 |
with ZipFile(filename, 'r') as zipObj:
|
141 |
contents = zipObj.namelist()
|
142 |
print(f"unpack zip: contents: {contents}")
|
@@ -144,7 +144,7 @@ def unpack_zip_file(filename:str,db:FAISS,progress):
|
|
144 |
shutil.unpack_archive(filename, tmp_directory)
|
145 |
|
146 |
if 'index.faiss' in [item.lower() for item in contents]:
|
147 |
-
db2 =
|
148 |
db.merge_from(db2)
|
149 |
return db
|
150 |
|
@@ -179,11 +179,11 @@ def embed_files(files,ui_session_id,progress=gr.Progress(),progress_step=0.05):
|
|
179 |
session_id = f"PDFAISS-{ui_session_id}"
|
180 |
|
181 |
try:
|
182 |
-
db =
|
183 |
except:
|
184 |
print(f"SESSION: {session_id} database does not exist, create a FAISS db")
|
185 |
#db = FAISS.from_documents([foo], embeddings)
|
186 |
-
db =
|
187 |
db.save_local(session_id)
|
188 |
print(f"SESSION: {session_id} database created")
|
189 |
|
@@ -222,7 +222,7 @@ def embed_files(files,ui_session_id,progress=gr.Progress(),progress_step=0.05):
|
|
222 |
|
223 |
### load the updated db and zip it ###
|
224 |
progress(progress_step, desc = 'loading db')
|
225 |
-
db =
|
226 |
print("EMBEDDED, after embeddeding: ",session_id,len(db.index_to_docstore_id))
|
227 |
progress(progress_step, desc = 'zipping db for download')
|
228 |
add_files_to_zip(session_id)
|
@@ -316,7 +316,7 @@ def ask_gpt(query, ui_session_id, history):
|
|
316 |
return "Please Login", "", ""
|
317 |
session_id = f"PDFAISS-{ui_session_id}"
|
318 |
try:
|
319 |
-
db =
|
320 |
print("ASKGPT after loading",session_id,len(db.index_to_docstore_id))
|
321 |
except:
|
322 |
print(f"SESSION: {session_id} database does not exist")
|
|
|
4 |
from langchain_huggingface import HuggingFaceEmbeddings
|
5 |
from langchain_community.document_loaders import UnstructuredPDFLoader,UnstructuredWordDocumentLoader
|
6 |
from langchain.indexes import VectorstoreIndexCreator
|
7 |
+
from langchain_community.vectorstores import LangChainFAISS
|
8 |
from zipfile import ZipFile
|
9 |
import gradio as gr
|
10 |
import openpyxl
|
|
|
136 |
progress(progress_step,'txt unpacked')
|
137 |
return merge_split_docs_to_db(split_docs,db,progress,progress_step)
|
138 |
|
139 |
+
def unpack_zip_file(filename:str,db:LangChainFAISS,progress):
|
140 |
with ZipFile(filename, 'r') as zipObj:
|
141 |
contents = zipObj.namelist()
|
142 |
print(f"unpack zip: contents: {contents}")
|
|
|
144 |
shutil.unpack_archive(filename, tmp_directory)
|
145 |
|
146 |
if 'index.faiss' in [item.lower() for item in contents]:
|
147 |
+
db2 = LangChainFAISS.load_local(tmp_directory, embeddings, allow_dangerous_deserialization=True)
|
148 |
db.merge_from(db2)
|
149 |
return db
|
150 |
|
|
|
179 |
session_id = f"PDFAISS-{ui_session_id}"
|
180 |
|
181 |
try:
|
182 |
+
db = LangChainFAISS.load_local(session_id,embeddings, allow_dangerous_deserialization=True)
|
183 |
except:
|
184 |
print(f"SESSION: {session_id} database does not exist, create a FAISS db")
|
185 |
#db = FAISS.from_documents([foo], embeddings)
|
186 |
+
db = LangChainFAISS.from_texts(["foo is fou!"],embeddings,[{"source":"foo source"}])
|
187 |
db.save_local(session_id)
|
188 |
print(f"SESSION: {session_id} database created")
|
189 |
|
|
|
222 |
|
223 |
### load the updated db and zip it ###
|
224 |
progress(progress_step, desc = 'loading db')
|
225 |
+
db = LangChainFAISS.load_local(session_id,embeddings, allow_dangerous_deserialization=True)
|
226 |
print("EMBEDDED, after embeddeding: ",session_id,len(db.index_to_docstore_id))
|
227 |
progress(progress_step, desc = 'zipping db for download')
|
228 |
add_files_to_zip(session_id)
|
|
|
316 |
return "Please Login", "", ""
|
317 |
session_id = f"PDFAISS-{ui_session_id}"
|
318 |
try:
|
319 |
+
db = LangChainFAISS.load_local(session_id,embeddings, allow_dangerous_deserialization=True)
|
320 |
print("ASKGPT after loading",session_id,len(db.index_to_docstore_id))
|
321 |
except:
|
322 |
print(f"SESSION: {session_id} database does not exist")
|