XicoC commited on
Commit
f52683b
·
verified ·
1 Parent(s): 97ebef6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -7
app.py CHANGED
@@ -15,7 +15,6 @@ from aimakerspace.vectordatabase import VectorDatabase
15
  from aimakerspace.openai_utils.chatmodel import ChatOpenAI
16
  import chainlit as cl
17
  from langchain_community.document_loaders import PyPDFLoader
18
- from langchain_text_splitters import RecursiveCharacterTextSplitter
19
 
20
  system_template = """\
21
  Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
@@ -59,10 +58,7 @@ class RetrievalAugmentedQAPipeline:
59
 
60
 
61
  text_splitter = CharacterTextSplitter()
62
- pdf_text_splitter = RecursiveCharacterTextSplitter(
63
- chunk_size=1000,
64
- chunk_overlap=200,
65
- )
66
 
67
  def process_text_file(file: AskFileResponse):
68
  import tempfile
@@ -94,7 +90,7 @@ def process_pdf_file(file: AskFileResponse):
94
 
95
  pdf_loader = PyPDFLoader(temp_file_path)
96
  documents = pdf_loader.load()
97
- texts = pdf_text_splitter.split_documents(documents)
98
  return texts
99
 
100
 
@@ -124,6 +120,7 @@ async def on_chat_start():
124
  else:
125
  texts = process_text_file(file)
126
 
 
127
 
128
  # Create a dict vector store
129
  vector_db = VectorDatabase()
@@ -153,4 +150,4 @@ async def main(message):
153
  async for stream_resp in result["response"]:
154
  await msg.stream_token(stream_resp)
155
 
156
- await msg.send()
 
15
  from aimakerspace.openai_utils.chatmodel import ChatOpenAI
16
  import chainlit as cl
17
  from langchain_community.document_loaders import PyPDFLoader
 
18
 
19
  system_template = """\
20
  Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
 
58
 
59
 
60
  text_splitter = CharacterTextSplitter()
61
+
 
 
 
62
 
63
  def process_text_file(file: AskFileResponse):
64
  import tempfile
 
90
 
91
  pdf_loader = PyPDFLoader(temp_file_path)
92
  documents = pdf_loader.load()
93
+ texts = text_splitter.split_texts(documents)
94
  return texts
95
 
96
 
 
120
  else:
121
  texts = process_text_file(file)
122
 
123
+ print(f"Processing {len(texts)} text chunks")
124
 
125
  # Create a dict vector store
126
  vector_db = VectorDatabase()
 
150
  async for stream_resp in result["response"]:
151
  await msg.stream_token(stream_resp)
152
 
153
+ await msg.send()