XicoC commited on
Commit
4c9dc31
·
verified ·
1 Parent(s): 9085b4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -15,6 +15,7 @@ from aimakerspace.vectordatabase import VectorDatabase
15
  from aimakerspace.openai_utils.chatmodel import ChatOpenAI
16
  import chainlit as cl
17
  from langchain_community.document_loaders import PyPDFLoader
 
18
 
19
  system_template = """\
20
  Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
@@ -58,7 +59,10 @@ class RetrievalAugmentedQAPipeline:
58
 
59
 
60
  text_splitter = CharacterTextSplitter()
61
-
 
 
 
62
 
63
  def process_text_file(file: AskFileResponse):
64
  import tempfile
@@ -90,7 +94,7 @@ def process_pdf_file(file: AskFileResponse):
90
 
91
  pdf_loader = PyPDFLoader(temp_file_path)
92
  documents = pdf_loader.load()
93
- texts = text_splitter.split_texts(documents)
94
  return texts
95
 
96
 
 
15
  from aimakerspace.openai_utils.chatmodel import ChatOpenAI
16
  import chainlit as cl
17
  from langchain_community.document_loaders import PyPDFLoader
18
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
19
 
20
  system_template = """\
21
  Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
 
59
 
60
 
61
  text_splitter = CharacterTextSplitter()
62
+ pdf_text_splitter = RecursiveCharacterTextSplitter(
63
+ chunk_size=1000,
64
+ chunk_overlap=200,
65
+ )
66
 
67
  def process_text_file(file: AskFileResponse):
68
  import tempfile
 
94
 
95
  pdf_loader = PyPDFLoader(temp_file_path)
96
  documents = pdf_loader.load()
97
+ texts = pdf_text_splitter.split_texts(documents)
98
  return texts
99
 
100