File size: 1,174 Bytes
1534ef4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.vectorstores import FAISS
from config import embeddings
def create_vectoreDB(file_path:str):
try:
loader = PyMuPDFLoader(file_path=file_path)
documents = loader.load()
# Process the text to remove "\n\n" and " "
for doc in documents:
doc.page_content = doc.page_content.replace("\n \n", "").replace(" ", "").replace("----", "").replace("====", "")
vectorstore = FAISS.from_documents(
documents,
embedding=embeddings
)
path = f"vectors/{file_path}".replace(".pdf","").replace("data/","")
vectorstore.save_local(path)
print(f"VectoreStore has been created at: {path}")
return {"status": "completed"}
except Exception as e:
print(str(e))
return None
create_vectoreDB("data/Oldcastle-KnowldgeBase.pdf") |