Poonawala commited on
Commit
1534ef4
·
verified ·
1 Parent(s): 95613e0

Update embeddings.py

Browse files
Files changed (1) hide show
  1. embeddings.py +30 -30
embeddings.py CHANGED
@@ -1,30 +1,30 @@
1
- from langchain_community.document_loaders import PyMuPDFLoader
2
- from langchain_community.vectorstores import FAISS
3
- from config import embeddings
4
-
5
- def create_vectoreDB(file_path:str):
6
- try:
7
- loader = PyMuPDFLoader(file_path=file_path)
8
- documents = loader.load()
9
-
10
- # Process the text to remove "\n\n" and " "
11
- for doc in documents:
12
- doc.page_content = doc.page_content.replace("\n \n", "").replace(" ", "").replace("----", "").replace("====", "")
13
-
14
- vectorstore = FAISS.from_documents(
15
- documents,
16
- embedding=embeddings
17
- )
18
-
19
- path = f"vectors/{file_path}".replace(".pdf","").replace("data/","")
20
- vectorstore.save_local(path)
21
-
22
- print(f"VectoreStore has been created at: {path}")
23
- return {"status": "completed"}
24
-
25
- except Exception as e:
26
- print(str(e))
27
- return None
28
-
29
-
30
- # create_vectoreDB("data/Oldcastle-KnowldgeBase.pdf")
 
1
+ from langchain_community.document_loaders import PyMuPDFLoader
2
+ from langchain_community.vectorstores import FAISS
3
+ from config import embeddings
4
+
5
+ def create_vectoreDB(file_path:str):
6
+ try:
7
+ loader = PyMuPDFLoader(file_path=file_path)
8
+ documents = loader.load()
9
+
10
+ # Process the text to remove "\n\n" and " "
11
+ for doc in documents:
12
+ doc.page_content = doc.page_content.replace("\n \n", "").replace(" ", "").replace("----", "").replace("====", "")
13
+
14
+ vectorstore = FAISS.from_documents(
15
+ documents,
16
+ embedding=embeddings
17
+ )
18
+
19
+ path = f"vectors/{file_path}".replace(".pdf","").replace("data/","")
20
+ vectorstore.save_local(path)
21
+
22
+ print(f"VectoreStore has been created at: {path}")
23
+ return {"status": "completed"}
24
+
25
+ except Exception as e:
26
+ print(str(e))
27
+ return None
28
+
29
+
30
+ create_vectoreDB("data/Oldcastle-KnowldgeBase.pdf")