Spaces:

XicoC
/

PythonicRagApp

Sleeping

XicoC commited on Aug 24, 2024

Commit

f52683b

verified ·

1 Parent(s): 97ebef6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,7 +15,6 @@ from aimakerspace.vectordatabase import VectorDatabase
 from aimakerspace.openai_utils.chatmodel import ChatOpenAI
 import chainlit as cl
 from langchain_community.document_loaders import PyPDFLoader
-from langchain_text_splitters import RecursiveCharacterTextSplitter
 system_template = """\
 Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
@@ -59,10 +58,7 @@ class RetrievalAugmentedQAPipeline:
 text_splitter = CharacterTextSplitter()
-pdf_text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=1000,
-        chunk_overlap=200,
-    )
 def process_text_file(file: AskFileResponse):
     import tempfile
@@ -94,7 +90,7 @@ def process_pdf_file(file: AskFileResponse):
     pdf_loader = PyPDFLoader(temp_file_path)
     documents = pdf_loader.load()
-    texts = pdf_text_splitter.split_documents(documents)
     return texts
@@ -124,6 +120,7 @@ async def on_chat_start():
     else:
         texts = process_text_file(file)
     # Create a dict vector store
     vector_db = VectorDatabase()
@@ -153,4 +150,4 @@ async def main(message):
     async for stream_resp in result["response"]:
         await msg.stream_token(stream_resp)
-    await msg.send()

 from aimakerspace.openai_utils.chatmodel import ChatOpenAI
 import chainlit as cl
 from langchain_community.document_loaders import PyPDFLoader
 system_template = """\
 Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
 text_splitter = CharacterTextSplitter()
 def process_text_file(file: AskFileResponse):
     import tempfile
     pdf_loader = PyPDFLoader(temp_file_path)
     documents = pdf_loader.load()
+    texts = text_splitter.split_texts(documents)
     return texts
     else:
         texts = process_text_file(file)
+    print(f"Processing {len(texts)} text chunks")
     # Create a dict vector store
     vector_db = VectorDatabase()
     async for stream_resp in result["response"]:
         await msg.stream_token(stream_resp)
+    await msg.send()