ajwthompson commited on
Commit
8fbd8e0
·
1 Parent(s): 621402a

Update ingest_data.py

Browse files
Files changed (1) hide show
  1. ingest_data.py +25 -14
ingest_data.py CHANGED
@@ -4,20 +4,31 @@ from langchain.vectorstores.faiss import FAISS
4
  from langchain.embeddings import OpenAIEmbeddings
5
  import pickle
6
 
7
- # Load Data
8
- loader = UnstructuredFileLoader("cleaned_text-11-02-2023.txt")
9
- raw_documents = loader.load()
10
 
11
- # Split text
12
- text_splitter = RecursiveCharacterTextSplitter()
13
- documents = text_splitter.split_documents(raw_documents)
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- # Load Data to vectorstore
17
- embeddings = OpenAIEmbeddings()
18
- vectorstore = FAISS.from_documents(documents, embeddings)
19
-
20
-
21
- # Save vectorstore
22
- with open("vectorstore.pkl", "wb") as f:
23
- pickle.dump(vectorstore, f)
 
4
  from langchain.embeddings import OpenAIEmbeddings
5
  import pickle
6
 
 
 
 
7
 
 
 
 
8
 
9
+ def ingest_data(vector_file_path):
10
+ # Load Data
11
+ loader = UnstructuredFileLoader("cleaned_text-11-02-2023.txt")
12
+ raw_documents = loader.load()
13
+
14
+ # Split text
15
+ text_splitter = RecursiveCharacterTextSplitter()
16
+ documents = text_splitter.split_documents(raw_documents)
17
+
18
+
19
+ # Load Data to vectorstore
20
+ embeddings = OpenAIEmbeddings()
21
+ vectorstore = FAISS.from_documents(documents, embeddings)
22
+
23
+
24
+ # Save vectorstore
25
+ with open(vector_file_path, "wb") as f:
26
+ pickle.dump(vectorstore, f)
27
+ return ingest
28
 
29
+ def get_vectorstore():
30
+ vector_file_path = "vectorstore.pkl"
31
+ if os.path.isfile(vector_file_path):
32
+ return vector_file_path
33
+ else:
34
+ ingest_data(vector_file_path)