from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.document_loaders import UnstructuredFileLoader from langchain.vectorstores.faiss import FAISS from langchain.embeddings import OpenAIEmbeddings import pickle def ingest_data(vector_file_path): # Load Data loader = UnstructuredFileLoader("cleaned_text-11-02-2023.txt") raw_documents = loader.load() # Split text text_splitter = RecursiveCharacterTextSplitter() documents = text_splitter.split_documents(raw_documents) # Load Data to vectorstore embeddings = OpenAIEmbeddings() vectorstore = FAISS.from_documents(documents, embeddings) # Save vectorstore with open(vector_file_path, "wb") as f: pickle.dump(vectorstore, f) return vector_file_path def get_vectorstore(): vector_file_path = "vectorstore.pkl" if os.path.isfile(vector_file_path): return vector_file_path else: ingest_data(vector_file_path)