captain-awesome commited on
Commit
d944073
·
1 Parent(s): caa47ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -29
app.py CHANGED
@@ -1,5 +1,6 @@
1
  from langchain.chains import ConversationalRetrievalChain
2
  from langchain.chains.question_answering import load_qa_chain
 
3
  from langchain.memory import ConversationBufferMemory
4
  from langchain.memory import ConversationTokenBufferMemory
5
  from langchain.llms import HuggingFacePipeline
@@ -158,12 +159,13 @@ def create_vector_database(loaded_documents):
158
  """
159
 
160
  # Split loaded documents into chunks
161
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=40, length_function = len)
162
  chunked_documents = text_splitter.split_documents(loaded_documents)
163
 
164
  # Initialize HuggingFace embeddings
165
  # embeddings = HuggingFaceEmbeddings(
166
- # model_name="sentence-transformers/all-MiniLM-L6-v2"
 
167
  # )
168
  embeddings = HuggingFaceBgeEmbeddings(
169
  model_name = "BAAI/bge-large-en"
@@ -196,31 +198,39 @@ def set_custom_prompt():
196
  """
197
  Prompt template for retrieval for each vectorstore
198
  """
 
 
 
 
 
199
 
 
200
 
201
- prompt_template = """<Instructions>
202
- Important:
203
- Answer with the facts listed in the list of sources below. If there isn't enough information below, say you don't know.
204
- If asking a clarifying question to the user would help, ask the question.
205
- ALWAYS return a "SOURCES" part in your answer, except for small-talk conversations.
206
 
207
- Question: {question}
208
 
209
- {context}
 
210
 
 
 
 
 
 
 
211
 
 
212
  Question: {question}
213
- Helpful Answer:
214
 
215
- ---------------------------
216
- ---------------------------
217
- Sources:
218
  """
219
 
220
  prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
221
  return prompt
222
 
223
- def create_chain(llm, prompt, CONDENSE_QUESTION_PROMPT, db):
 
224
  """
225
  Creates a Retrieval Question-Answering (QA) chain using a given language model, prompt, and database.
226
 
@@ -237,16 +247,22 @@ def create_chain(llm, prompt, CONDENSE_QUESTION_PROMPT, db):
237
  ConversationalRetrievalChain: The initialized conversational chain.
238
  """
239
  memory = ConversationTokenBufferMemory(llm=llm, memory_key="chat_history", return_messages=True, input_key='question', output_key='answer')
240
- chain = ConversationalRetrievalChain.from_llm(
241
- llm=llm,
242
- chain_type="stuff",
243
- retriever=db.as_retriever(search_kwargs={"k": 3}),
244
- return_source_documents=True,
245
- max_tokens_limit=256,
246
- combine_docs_chain_kwargs={"prompt": prompt},
247
- condense_question_prompt=CONDENSE_QUESTION_PROMPT,
248
- memory=memory,
249
- )
 
 
 
 
 
 
250
  return chain
251
 
252
  def create_retrieval_qa_bot(loaded_documents):
@@ -263,10 +279,10 @@ def create_retrieval_qa_bot(loaded_documents):
263
  except Exception as e:
264
  raise Exception(f"Failed to get prompt: {str(e)}")
265
 
266
- try:
267
- CONDENSE_QUESTION_PROMPT = set_custom_prompt_condense() # Assuming this function exists and works as expected
268
- except Exception as e:
269
- raise Exception(f"Failed to get condense prompt: {str(e)}")
270
 
271
  try:
272
  db = create_vector_database(loaded_documents) # Assuming this function exists and works as expected
@@ -274,8 +290,11 @@ def create_retrieval_qa_bot(loaded_documents):
274
  raise Exception(f"Failed to get database: {str(e)}")
275
 
276
  try:
 
 
 
277
  qa = create_chain(
278
- llm=llm, prompt=prompt,CONDENSE_QUESTION_PROMPT=CONDENSE_QUESTION_PROMPT, db=db
279
  ) # Assuming this function exists and works as expected
280
  except Exception as e:
281
  raise Exception(f"Failed to create retrieval QA chain: {str(e)}")
 
1
  from langchain.chains import ConversationalRetrievalChain
2
  from langchain.chains.question_answering import load_qa_chain
3
+ from lanchain.chains import RetrievalQA
4
  from langchain.memory import ConversationBufferMemory
5
  from langchain.memory import ConversationTokenBufferMemory
6
  from langchain.llms import HuggingFacePipeline
 
159
  """
160
 
161
  # Split loaded documents into chunks
162
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=30, length_function = len)
163
  chunked_documents = text_splitter.split_documents(loaded_documents)
164
 
165
  # Initialize HuggingFace embeddings
166
  # embeddings = HuggingFaceEmbeddings(
167
+ # # model_name="sentence-transformers/all-MiniLM-L6-v2"
168
+ # model_name = "sentence-transformers/all-mpnet-base-v2"
169
  # )
170
  embeddings = HuggingFaceBgeEmbeddings(
171
  model_name = "BAAI/bge-large-en"
 
198
  """
199
  Prompt template for retrieval for each vectorstore
200
  """
201
+ # prompt_template = """<Instructions>
202
+ # Important:
203
+ # Answer with the facts listed in the list of sources below. If there isn't enough information below, say you don't know.
204
+ # If asking a clarifying question to the user would help, ask the question.
205
+ # ALWAYS return a "SOURCES" part in your answer, except for small-talk conversations.
206
 
207
+ # Question: {question}
208
 
209
+ # {context}
 
 
 
 
210
 
 
211
 
212
+ # Question: {question}
213
+ # Helpful Answer:
214
 
215
+ # ---------------------------
216
+ # ---------------------------
217
+ # Sources:
218
+ # """
219
+ prompt_template = """Use the following pieces of information to answer the user's question.
220
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
221
 
222
+ Context: {context}
223
  Question: {question}
 
224
 
225
+ Only return the helpful answer below and nothing else.
226
+ Helpful answer:
 
227
  """
228
 
229
  prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
230
  return prompt
231
 
232
+ # def create_chain(llm, prompt, CONDENSE_QUESTION_PROMPT, db):
233
+ def create_chain(llm, prompt, db):
234
  """
235
  Creates a Retrieval Question-Answering (QA) chain using a given language model, prompt, and database.
236
 
 
247
  ConversationalRetrievalChain: The initialized conversational chain.
248
  """
249
  memory = ConversationTokenBufferMemory(llm=llm, memory_key="chat_history", return_messages=True, input_key='question', output_key='answer')
250
+ # chain = ConversationalRetrievalChain.from_llm(
251
+ # llm=llm,
252
+ # chain_type="stuff",
253
+ # retriever=db.as_retriever(search_kwargs={"k": 3}),
254
+ # return_source_documents=True,
255
+ # max_tokens_limit=256,
256
+ # combine_docs_chain_kwargs={"prompt": prompt},
257
+ # condense_question_prompt=CONDENSE_QUESTION_PROMPT,
258
+ # memory=memory,
259
+ # )
260
+ chain = RetrievalQA.from_chain_type(llm=llm,
261
+ chain_type='stuff',
262
+ retriever=vectordb.as_retriever(search_kwargs={'k': 3}),
263
+ return_source_documents=True,
264
+ chain_type_kwargs={'prompt': prompt}
265
+ )
266
  return chain
267
 
268
  def create_retrieval_qa_bot(loaded_documents):
 
279
  except Exception as e:
280
  raise Exception(f"Failed to get prompt: {str(e)}")
281
 
282
+ # try:
283
+ # CONDENSE_QUESTION_PROMPT = set_custom_prompt_condense() # Assuming this function exists and works as expected
284
+ # except Exception as e:
285
+ # raise Exception(f"Failed to get condense prompt: {str(e)}")
286
 
287
  try:
288
  db = create_vector_database(loaded_documents) # Assuming this function exists and works as expected
 
290
  raise Exception(f"Failed to get database: {str(e)}")
291
 
292
  try:
293
+ # qa = create_chain(
294
+ # llm=llm, prompt=prompt,CONDENSE_QUESTION_PROMPT=CONDENSE_QUESTION_PROMPT, db=db
295
+ # ) # Assuming this function exists and works as expected
296
  qa = create_chain(
297
+ llm=llm, prompt=prompt, db=db
298
  ) # Assuming this function exists and works as expected
299
  except Exception as e:
300
  raise Exception(f"Failed to create retrieval QA chain: {str(e)}")