|
from langchain_community.document_loaders import PyMuPDFLoader
|
|
from langchain_community.vectorstores import FAISS
|
|
from config import embeddings
|
|
|
|
def create_vectoreDB(file_path:str):
|
|
try:
|
|
loader = PyMuPDFLoader(file_path=file_path)
|
|
documents = loader.load()
|
|
|
|
|
|
for doc in documents:
|
|
doc.page_content = doc.page_content.replace("\n \n", "").replace(" ", "").replace("----", "").replace("====", "")
|
|
|
|
vectorstore = FAISS.from_documents(
|
|
documents,
|
|
embedding=embeddings
|
|
)
|
|
|
|
path = f"vectors/{file_path}".replace(".pdf","").replace("data/","")
|
|
vectorstore.save_local(path)
|
|
|
|
print(f"VectoreStore has been created at: {path}")
|
|
return {"status": "completed"}
|
|
|
|
except Exception as e:
|
|
print(str(e))
|
|
return None
|
|
|
|
|
|
|