File size: 1,174 Bytes
1534ef4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.vectorstores import FAISS
from config import embeddings

def create_vectoreDB(file_path:str):
        try:  
            loader = PyMuPDFLoader(file_path=file_path)
            documents = loader.load()
            
            # Process the text to remove "\n\n" and "   "
            for doc in documents:
                doc.page_content = doc.page_content.replace("\n \n", "").replace("   ", "").replace("----", "").replace("====", "")
                
            vectorstore = FAISS.from_documents(
                                            documents, 
                                            embedding=embeddings
                                        )
            
            path = f"vectors/{file_path}".replace(".pdf","").replace("data/","")
            vectorstore.save_local(path)
            
            print(f"VectoreStore has been created at: {path}")
            return {"status": "completed"}
            
        except Exception as e:
            print(str(e))
            return None 
        
        
create_vectoreDB("data/Oldcastle-KnowldgeBase.pdf")