Update app.py
Browse files
app.py
CHANGED
@@ -69,15 +69,18 @@ class CFG:
|
|
69 |
PDFs_path = './data'
|
70 |
Embeddings_path = './embeddings/input'
|
71 |
Output_folder = './ml-papers-vector'
|
72 |
-
|
|
|
73 |
loader = DirectoryLoader(CFG.PDFs_path, glob="./*.pdf", loader_cls=PyPDFLoader,use_multithreading=True)
|
74 |
|
|
|
75 |
documents = loader.load()
|
76 |
|
|
|
77 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size = CFG.split_chunk_size, chunk_overlap = CFG.split_overlap)
|
78 |
-
|
79 |
texts = text_splitter.split_documents(documents)
|
80 |
-
|
81 |
if not os.path.exists(CFG.Embeddings_path + '/index.faiss'):
|
82 |
embeddings = HuggingFaceInstructEmbeddings(model_name = CFG.embeddings_model_repo, model_kwargs={"device":"cuda"})
|
83 |
vectordb = FAISS.from_documents(documents=texts, embedding=embeddings)
|
|
|
69 |
PDFs_path = './data'
|
70 |
Embeddings_path = './embeddings/input'
|
71 |
Output_folder = './ml-papers-vector'
|
72 |
+
|
73 |
+
@spaces.GPU(duration=120)
|
74 |
loader = DirectoryLoader(CFG.PDFs_path, glob="./*.pdf", loader_cls=PyPDFLoader,use_multithreading=True)
|
75 |
|
76 |
+
@spaces.GPU(duration=120)
|
77 |
documents = loader.load()
|
78 |
|
79 |
+
@spaces.GPU(duration=120)
|
80 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size = CFG.split_chunk_size, chunk_overlap = CFG.split_overlap)
|
81 |
+
@spaces.GPU(duration=120)
|
82 |
texts = text_splitter.split_documents(documents)
|
83 |
+
@spaces.GPU(duration=120)
|
84 |
if not os.path.exists(CFG.Embeddings_path + '/index.faiss'):
|
85 |
embeddings = HuggingFaceInstructEmbeddings(model_name = CFG.embeddings_model_repo, model_kwargs={"device":"cuda"})
|
86 |
vectordb = FAISS.from_documents(documents=texts, embedding=embeddings)
|