Spaces:
Running
on
Zero
Running
on
Zero
new code
Browse files- src/pdfchatbot.py +9 -3
src/pdfchatbot.py
CHANGED
|
@@ -11,7 +11,7 @@ from langchain.document_loaders import PyPDFLoader
|
|
| 11 |
from langchain.prompts import PromptTemplate
|
| 12 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 13 |
import spaces
|
| 14 |
-
from langchain_text_splitters import CharacterTextSplitter
|
| 15 |
|
| 16 |
|
| 17 |
class PDFChatBot:
|
|
@@ -96,7 +96,12 @@ class PDFChatBot:
|
|
| 96 |
"""
|
| 97 |
Load the vector database from the documents and embeddings.
|
| 98 |
"""
|
| 99 |
-
text_splitter =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
docs = text_splitter.split_documents(self.documents)
|
| 101 |
self.vectordb = Chroma.from_documents(docs, self.embeddings)
|
| 102 |
|
|
@@ -132,7 +137,8 @@ class PDFChatBot:
|
|
| 132 |
def create_organic_pipeline(self):
|
| 133 |
self.pipeline = pipeline(
|
| 134 |
"text-generation",
|
| 135 |
-
model=self.
|
|
|
|
| 136 |
model_kwargs={"torch_dtype": torch.bfloat16},
|
| 137 |
device="cuda",
|
| 138 |
)
|
|
|
|
| 11 |
from langchain.prompts import PromptTemplate
|
| 12 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 13 |
import spaces
|
| 14 |
+
from langchain_text_splitters import CharacterTextSplitter,RecursiveCharacterTextSplitter
|
| 15 |
|
| 16 |
|
| 17 |
class PDFChatBot:
|
|
|
|
| 96 |
"""
|
| 97 |
Load the vector database from the documents and embeddings.
|
| 98 |
"""
|
| 99 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
| 100 |
+
chunk_size=256,
|
| 101 |
+
chunk_overlap=100,
|
| 102 |
+
length_function=len,
|
| 103 |
+
add_start_index=True,
|
| 104 |
+
)
|
| 105 |
docs = text_splitter.split_documents(self.documents)
|
| 106 |
self.vectordb = Chroma.from_documents(docs, self.embeddings)
|
| 107 |
|
|
|
|
| 137 |
def create_organic_pipeline(self):
|
| 138 |
self.pipeline = pipeline(
|
| 139 |
"text-generation",
|
| 140 |
+
model=self.model,
|
| 141 |
+
tokenizer=self.tokenizer,
|
| 142 |
model_kwargs={"torch_dtype": torch.bfloat16},
|
| 143 |
device="cuda",
|
| 144 |
)
|