markpeace commited on
Commit
94b154c
·
1 Parent(s): bbe9d13

moved to recursivechar

Browse files
Files changed (1) hide show
  1. train/faq.py +2 -2
train/faq.py CHANGED
@@ -1,7 +1,7 @@
1
 
2
  def train():
3
  from langchain_community.document_loaders.csv_loader import CSVLoader
4
- from langchain.text_splitter import CharacterTextSplitter
5
  from langchain_openai import OpenAIEmbeddings
6
  from langchain_community.vectorstores.faiss import FAISS
7
  from dotenv import load_dotenv
@@ -10,7 +10,7 @@ def train():
10
  documents = WebBaseLoader("https://rise.mmu.ac.uk/what-is-rise/").load()
11
 
12
  # Split document in chunks
13
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=30)
14
  docs = text_splitter.split_documents(documents=documents)
15
 
16
  embeddings = OpenAIEmbeddings()
 
1
 
2
  def train():
3
  from langchain_community.document_loaders.csv_loader import CSVLoader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain_openai import OpenAIEmbeddings
6
  from langchain_community.vectorstores.faiss import FAISS
7
  from dotenv import load_dotenv
 
10
  documents = WebBaseLoader("https://rise.mmu.ac.uk/what-is-rise/").load()
11
 
12
  # Split document in chunks
13
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
14
  docs = text_splitter.split_documents(documents=documents)
15
 
16
  embeddings = OpenAIEmbeddings()