Spaces:

captain-awesome
/

docuverse

Runtime error

App Files Files Community

captain-awesome commited on Oct 28, 2023

Commit

d944073

1 Parent(s): caa47ed

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -29

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from langchain.chains import ConversationalRetrievalChain
 from langchain.chains.question_answering import load_qa_chain
 from langchain.memory import ConversationBufferMemory
 from langchain.memory import ConversationTokenBufferMemory
 from langchain.llms import HuggingFacePipeline
@@ -158,12 +159,13 @@ def create_vector_database(loaded_documents):
     """
     # Split loaded documents into chunks
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=40, length_function = len)
     chunked_documents = text_splitter.split_documents(loaded_documents)
     # Initialize HuggingFace embeddings
     # embeddings = HuggingFaceEmbeddings(
-    #     model_name="sentence-transformers/all-MiniLM-L6-v2"
     # )
     embeddings = HuggingFaceBgeEmbeddings(
         model_name = "BAAI/bge-large-en"
@@ -196,31 +198,39 @@ def set_custom_prompt():
     """
     Prompt template for retrieval for each vectorstore
     """
-    prompt_template = """<Instructions>
-    Important:
-    Answer with the facts listed in the list of sources below. If there isn't enough information below, say you don't know.
-    If asking a clarifying question to the user would help, ask the question.
-    ALWAYS return a "SOURCES" part in your answer, except for small-talk conversations.
-    Question: {question}
-    {context}
     Question: {question}
-    Helpful Answer:
-    ---------------------------
-    ---------------------------
-    Sources:
     """
     prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
     return prompt
-def create_chain(llm, prompt, CONDENSE_QUESTION_PROMPT, db):
     """
     Creates a Retrieval Question-Answering (QA) chain using a given language model, prompt, and database.
@@ -237,16 +247,22 @@ def create_chain(llm, prompt, CONDENSE_QUESTION_PROMPT, db):
         ConversationalRetrievalChain: The initialized conversational chain.
     """
     memory = ConversationTokenBufferMemory(llm=llm, memory_key="chat_history", return_messages=True, input_key='question', output_key='answer')
-    chain = ConversationalRetrievalChain.from_llm(
-        llm=llm,
-        chain_type="stuff",
-        retriever=db.as_retriever(search_kwargs={"k": 3}),
-        return_source_documents=True,
-        max_tokens_limit=256,
-        combine_docs_chain_kwargs={"prompt": prompt},
-        condense_question_prompt=CONDENSE_QUESTION_PROMPT,
-        memory=memory,
-    )
     return chain
 def create_retrieval_qa_bot(loaded_documents):
@@ -263,10 +279,10 @@ def create_retrieval_qa_bot(loaded_documents):
     except Exception as e:
         raise Exception(f"Failed to get prompt: {str(e)}")
-    try:
-        CONDENSE_QUESTION_PROMPT = set_custom_prompt_condense()  # Assuming this function exists and works as expected
-    except Exception as e:
-        raise Exception(f"Failed to get condense prompt: {str(e)}")
     try:
         db = create_vector_database(loaded_documents)  # Assuming this function exists and works as expected
@@ -274,8 +290,11 @@ def create_retrieval_qa_bot(loaded_documents):
         raise Exception(f"Failed to get database: {str(e)}")
     try:
         qa = create_chain(
-            llm=llm, prompt=prompt,CONDENSE_QUESTION_PROMPT=CONDENSE_QUESTION_PROMPT, db=db
         )  # Assuming this function exists and works as expected
     except Exception as e:
         raise Exception(f"Failed to create retrieval QA chain: {str(e)}")

 from langchain.chains import ConversationalRetrievalChain
 from langchain.chains.question_answering import load_qa_chain
+from lanchain.chains import RetrievalQA
 from langchain.memory import ConversationBufferMemory
 from langchain.memory import ConversationTokenBufferMemory
 from langchain.llms import HuggingFacePipeline
     """
     # Split loaded documents into chunks
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=30, length_function = len)
     chunked_documents = text_splitter.split_documents(loaded_documents)
     # Initialize HuggingFace embeddings
     # embeddings = HuggingFaceEmbeddings(
+    #     # model_name="sentence-transformers/all-MiniLM-L6-v2"
+    #     model_name = "sentence-transformers/all-mpnet-base-v2"
     # )
     embeddings = HuggingFaceBgeEmbeddings(
         model_name = "BAAI/bge-large-en"
     """
     Prompt template for retrieval for each vectorstore
     """
+    # prompt_template = """<Instructions>
+    # Important:
+    # Answer with the facts listed in the list of sources below. If there isn't enough information below, say you don't know.
+    # If asking a clarifying question to the user would help, ask the question.
+    # ALWAYS return a "SOURCES" part in your answer, except for small-talk conversations.
+    # Question: {question}
+    # {context}
+    # Question: {question}
+    # Helpful Answer:
+    # ---------------------------
+    # ---------------------------
+    # Sources:
+    # """
+    prompt_template = """Use the following pieces of information to answer the user's question.
+    If you don't know the answer, just say that you don't know, don't try to make up an answer.
+    Context: {context}
     Question: {question}
+    Only return the helpful answer below and nothing else.
+    Helpful answer:
     """
     prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
     return prompt
+# def create_chain(llm, prompt, CONDENSE_QUESTION_PROMPT, db):
+def create_chain(llm, prompt, db):
     """
     Creates a Retrieval Question-Answering (QA) chain using a given language model, prompt, and database.
         ConversationalRetrievalChain: The initialized conversational chain.
     """
     memory = ConversationTokenBufferMemory(llm=llm, memory_key="chat_history", return_messages=True, input_key='question', output_key='answer')
+    # chain = ConversationalRetrievalChain.from_llm(
+    #     llm=llm,
+    #     chain_type="stuff",
+    #     retriever=db.as_retriever(search_kwargs={"k": 3}),
+    #     return_source_documents=True,
+    #     max_tokens_limit=256,
+    #     combine_docs_chain_kwargs={"prompt": prompt},
+    #     condense_question_prompt=CONDENSE_QUESTION_PROMPT,
+    #     memory=memory,
+    # )
+    chain = RetrievalQA.from_chain_type(llm=llm,
+                                       chain_type='stuff',
+                                       retriever=vectordb.as_retriever(search_kwargs={'k': 3}),
+                                       return_source_documents=True,
+                                       chain_type_kwargs={'prompt': prompt}
+                                       )
     return chain
 def create_retrieval_qa_bot(loaded_documents):
     except Exception as e:
         raise Exception(f"Failed to get prompt: {str(e)}")
+    # try:
+    #     CONDENSE_QUESTION_PROMPT = set_custom_prompt_condense()  # Assuming this function exists and works as expected
+    # except Exception as e:
+    #     raise Exception(f"Failed to get condense prompt: {str(e)}")
     try:
         db = create_vector_database(loaded_documents)  # Assuming this function exists and works as expected
         raise Exception(f"Failed to get database: {str(e)}")
     try:
+        # qa = create_chain(
+        #     llm=llm, prompt=prompt,CONDENSE_QUESTION_PROMPT=CONDENSE_QUESTION_PROMPT, db=db
+        # )  # Assuming this function exists and works as expected
         qa = create_chain(
+            llm=llm, prompt=prompt, db=db
         )  # Assuming this function exists and works as expected
     except Exception as e:
         raise Exception(f"Failed to create retrieval QA chain: {str(e)}")