hfwittmann commited on
Commit
0654708
·
1 Parent(s): 0a9785c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -1
app.py CHANGED
@@ -9,6 +9,7 @@ from langchain.document_loaders import PyPDFLoader
9
  from langchain.embeddings import OpenAIEmbeddings
10
  from langchain.indexes import VectorstoreIndexCreator
11
  from langchain.text_splitter import CharacterTextSplitter
 
12
  from langchain.llms import OpenAI
13
  from langchain.vectorstores import DocArrayInMemorySearch
14
  from uuid import uuid4
@@ -92,7 +93,14 @@ class myClass:
92
  self.index = VectorstoreIndexCreator(
93
  vectorstore_cls=DocArrayInMemorySearch,
94
  embedding=self.embedding,
95
- text_splitter=CharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
 
 
 
 
 
 
 
96
  ).from_loaders(loaders=loaders)
97
 
98
  # del os.environ["OPENAI_API_KEY"]
 
9
  from langchain.embeddings import OpenAIEmbeddings
10
  from langchain.indexes import VectorstoreIndexCreator
11
  from langchain.text_splitter import CharacterTextSplitter
12
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
13
  from langchain.llms import OpenAI
14
  from langchain.vectorstores import DocArrayInMemorySearch
15
  from uuid import uuid4
 
93
  self.index = VectorstoreIndexCreator(
94
  vectorstore_cls=DocArrayInMemorySearch,
95
  embedding=self.embedding,
96
+ text_splitter=text_splitter = RecursiveCharacterTextSplitter(
97
+ # Set a really small chunk size, just to show.
98
+ chunk_size = 1000,
99
+ chunk_overlap = 20,
100
+ length_function = len,
101
+ separators="."
102
+ )
103
+ )
104
  ).from_loaders(loaders=loaders)
105
 
106
  # del os.environ["OPENAI_API_KEY"]