from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.vectorstores import FAISS
from config import embeddings

def create_vectoreDB(file_path:str):
        try:  
            loader = PyMuPDFLoader(file_path=file_path)
            documents = loader.load()
            
            # Process the text to remove "\n\n" and "   "
            for doc in documents:
                doc.page_content = doc.page_content.replace("\n \n", "").replace("   ", "").replace("----", "").replace("====", "")
                
            vectorstore = FAISS.from_documents(
                                            documents, 
                                            embedding=embeddings
                                        )
            
            path = f"vectors/{file_path}".replace(".pdf","").replace("data/","")
            vectorstore.save_local(path)
            
            print(f"VectoreStore has been created at: {path}")
            return {"status": "completed"}
            
        except Exception as e:
            print(str(e))
            return None 
        
        
create_vectoreDB("data/Oldcastle-KnowldgeBase.pdf")