Spaces:
Running
on
Zero
Running
on
Zero
new code
Browse files- src/app.py +1 -1
- src/pdfchatbot.py +49 -5
src/app.py
CHANGED
|
@@ -16,7 +16,7 @@ with demo:
|
|
| 16 |
|
| 17 |
# Event handler for submitting text and generating response
|
| 18 |
submit_button.click(pdf_chatbot.add_text, inputs=[chat_history, txt], outputs=[chat_history], queue=False).\
|
| 19 |
-
success(pdf_chatbot.generate_response, inputs=[chat_history, txt, uploaded_pdf], outputs=[chat_history,
|
| 20 |
success(pdf_chatbot.render_file, inputs=[uploaded_pdf], outputs=[show_img])
|
| 21 |
|
| 22 |
if __name__ == "__main__":
|
|
|
|
| 16 |
|
| 17 |
# Event handler for submitting text and generating response
|
| 18 |
submit_button.click(pdf_chatbot.add_text, inputs=[chat_history, txt], outputs=[chat_history], queue=False).\
|
| 19 |
+
success(pdf_chatbot.generate_response, inputs=[chat_history, txt, uploaded_pdf], outputs=[chat_history,txt]).\
|
| 20 |
success(pdf_chatbot.render_file, inputs=[uploaded_pdf], outputs=[show_img])
|
| 21 |
|
| 22 |
if __name__ == "__main__":
|
src/pdfchatbot.py
CHANGED
|
@@ -14,7 +14,6 @@ import spaces
|
|
| 14 |
from langchain_text_splitters import CharacterTextSplitter
|
| 15 |
|
| 16 |
|
| 17 |
-
|
| 18 |
class PDFChatBot:
|
| 19 |
def __init__(self, config_path="config.yaml"):
|
| 20 |
"""
|
|
@@ -37,6 +36,8 @@ class PDFChatBot:
|
|
| 37 |
self.pipeline = None
|
| 38 |
self.chain = None
|
| 39 |
self.chunk_size = None
|
|
|
|
|
|
|
| 40 |
#self.chunk_size_slider = chunk_size_slider
|
| 41 |
|
| 42 |
def load_config(self, file_path):
|
|
@@ -128,6 +129,46 @@ class PDFChatBot:
|
|
| 128 |
)
|
| 129 |
self.pipeline = HuggingFacePipeline(pipeline=pipe)
|
| 130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
def create_chain(self):
|
| 132 |
"""
|
| 133 |
Create a Conversational Retrieval Chain
|
|
@@ -153,8 +194,8 @@ class PDFChatBot:
|
|
| 153 |
self.load_vectordb()
|
| 154 |
self.load_tokenizer()
|
| 155 |
self.load_model()
|
| 156 |
-
self.
|
| 157 |
-
self.create_chain()
|
| 158 |
@spaces.GPU
|
| 159 |
def generate_response(self, history, query, file):
|
| 160 |
"""
|
|
@@ -176,11 +217,14 @@ class PDFChatBot:
|
|
| 176 |
self.process_file(file)
|
| 177 |
self.processed = True
|
| 178 |
|
| 179 |
-
result = self.chain({"question": query, 'chat_history': self.chat_history}, return_only_outputs=True)
|
| 180 |
self.chat_history.append((query, result["answer"]))
|
| 181 |
for char in result['answer']:
|
| 182 |
history[-1][-1] += char
|
| 183 |
-
return history, " "
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
def render_file(self, file,chunk_size):
|
| 186 |
"""
|
|
|
|
| 14 |
from langchain_text_splitters import CharacterTextSplitter
|
| 15 |
|
| 16 |
|
|
|
|
| 17 |
class PDFChatBot:
|
| 18 |
def __init__(self, config_path="config.yaml"):
|
| 19 |
"""
|
|
|
|
| 36 |
self.pipeline = None
|
| 37 |
self.chain = None
|
| 38 |
self.chunk_size = None
|
| 39 |
+
self.current_context = None
|
| 40 |
+
self.format_seperator="""\n\n--\n\n"""
|
| 41 |
#self.chunk_size_slider = chunk_size_slider
|
| 42 |
|
| 43 |
def load_config(self, file_path):
|
|
|
|
| 129 |
)
|
| 130 |
self.pipeline = HuggingFacePipeline(pipeline=pipe)
|
| 131 |
|
| 132 |
+
def create_organic_pipeline(self):
|
| 133 |
+
self.pipeline = pipeline(
|
| 134 |
+
"text-generation",
|
| 135 |
+
model=self.config.get("autoModelForCausalLM"),
|
| 136 |
+
model_kwargs={"torch_dtype": torch.bfloat16},
|
| 137 |
+
device="cuda",
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
def get_organic_context(self, query):
|
| 141 |
+
documents = self.vectordb.similarity_search_with_relevance_scores(query, k=self.k)
|
| 142 |
+
context = self.format_seperator.join([doc.page_content for doc, score in documents])
|
| 143 |
+
self.current_context = context
|
| 144 |
+
print(self.current_context)
|
| 145 |
+
|
| 146 |
+
def create_organic_response(self, history, query):
|
| 147 |
+
self.get_organic_context(query)
|
| 148 |
+
messages = [
|
| 149 |
+
{"role": "system", "content": "From the the contained given below, answer the question of user \n " + self.current_context},
|
| 150 |
+
{"role": "user", "content": query},
|
| 151 |
+
]
|
| 152 |
+
prompt = self.pipeline.tokenizer.apply_chat_template(
|
| 153 |
+
messages,
|
| 154 |
+
tokenize=False,
|
| 155 |
+
add_generation_prompt=True
|
| 156 |
+
)
|
| 157 |
+
terminators = [
|
| 158 |
+
self.pipeline.tokenizer.eos_token_id,
|
| 159 |
+
self.pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
| 160 |
+
]
|
| 161 |
+
temp = 0.1
|
| 162 |
+
outputs = pipeline(
|
| 163 |
+
prompt,
|
| 164 |
+
max_new_tokens=1024,
|
| 165 |
+
eos_token_id=terminators,
|
| 166 |
+
do_sample=True,
|
| 167 |
+
temperature=temp,
|
| 168 |
+
top_p=0.9,
|
| 169 |
+
)
|
| 170 |
+
return outputs[0]["generated_text"][len(prompt):]
|
| 171 |
+
|
| 172 |
def create_chain(self):
|
| 173 |
"""
|
| 174 |
Create a Conversational Retrieval Chain
|
|
|
|
| 194 |
self.load_vectordb()
|
| 195 |
self.load_tokenizer()
|
| 196 |
self.load_model()
|
| 197 |
+
self.create_organic_pipeline()
|
| 198 |
+
#self.create_chain()
|
| 199 |
@spaces.GPU
|
| 200 |
def generate_response(self, history, query, file):
|
| 201 |
"""
|
|
|
|
| 217 |
self.process_file(file)
|
| 218 |
self.processed = True
|
| 219 |
|
| 220 |
+
"""result = self.chain({"question": query, 'chat_history': self.chat_history}, return_only_outputs=True)
|
| 221 |
self.chat_history.append((query, result["answer"]))
|
| 222 |
for char in result['answer']:
|
| 223 |
history[-1][-1] += char
|
| 224 |
+
return history, " """""
|
| 225 |
+
|
| 226 |
+
result = self.create_organic_response(history="",query=query)
|
| 227 |
+
return result,""
|
| 228 |
|
| 229 |
def render_file(self, file,chunk_size):
|
| 230 |
"""
|