Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,6 @@ from aimakerspace.vectordatabase import VectorDatabase
|
|
15 |
from aimakerspace.openai_utils.chatmodel import ChatOpenAI
|
16 |
import chainlit as cl
|
17 |
from langchain_community.document_loaders import PyPDFLoader
|
18 |
-
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
19 |
|
20 |
system_template = """\
|
21 |
Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
|
@@ -59,10 +58,7 @@ class RetrievalAugmentedQAPipeline:
|
|
59 |
|
60 |
|
61 |
text_splitter = CharacterTextSplitter()
|
62 |
-
|
63 |
-
chunk_size=1000,
|
64 |
-
chunk_overlap=200,
|
65 |
-
)
|
66 |
|
67 |
def process_text_file(file: AskFileResponse):
|
68 |
import tempfile
|
@@ -94,7 +90,7 @@ def process_pdf_file(file: AskFileResponse):
|
|
94 |
|
95 |
pdf_loader = PyPDFLoader(temp_file_path)
|
96 |
documents = pdf_loader.load()
|
97 |
-
texts =
|
98 |
return texts
|
99 |
|
100 |
|
@@ -124,6 +120,7 @@ async def on_chat_start():
|
|
124 |
else:
|
125 |
texts = process_text_file(file)
|
126 |
|
|
|
127 |
|
128 |
# Create a dict vector store
|
129 |
vector_db = VectorDatabase()
|
@@ -153,4 +150,4 @@ async def main(message):
|
|
153 |
async for stream_resp in result["response"]:
|
154 |
await msg.stream_token(stream_resp)
|
155 |
|
156 |
-
await msg.send()
|
|
|
15 |
from aimakerspace.openai_utils.chatmodel import ChatOpenAI
|
16 |
import chainlit as cl
|
17 |
from langchain_community.document_loaders import PyPDFLoader
|
|
|
18 |
|
19 |
system_template = """\
|
20 |
Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
|
|
|
58 |
|
59 |
|
60 |
text_splitter = CharacterTextSplitter()
|
61 |
+
|
|
|
|
|
|
|
62 |
|
63 |
def process_text_file(file: AskFileResponse):
|
64 |
import tempfile
|
|
|
90 |
|
91 |
pdf_loader = PyPDFLoader(temp_file_path)
|
92 |
documents = pdf_loader.load()
|
93 |
+
texts = text_splitter.split_texts(documents)
|
94 |
return texts
|
95 |
|
96 |
|
|
|
120 |
else:
|
121 |
texts = process_text_file(file)
|
122 |
|
123 |
+
print(f"Processing {len(texts)} text chunks")
|
124 |
|
125 |
# Create a dict vector store
|
126 |
vector_db = VectorDatabase()
|
|
|
150 |
async for stream_resp in result["response"]:
|
151 |
await msg.stream_token(stream_resp)
|
152 |
|
153 |
+
await msg.send()
|