Spaces:

edithram23
/

Chatbot

Runtime error

App Files Files Community

edithram23 commited on Oct 31, 2024

Commit

e900f80

1 Parent(s): ac12f77

initial commit

Browse files

Files changed (3) hide show

app.py +4 -4
retriever.py +18 -12
setup.py +17 -12

app.py CHANGED Viewed

@@ -32,17 +32,17 @@ def process(audio, input_text, pdfs, chat_history: list[ChatMessage]):
         pdf_uploaded = True
         pdf_path = pdfs.name
         output_id = vector.upload_pdfs_user(pdf_path)
-        print(output_id)
     if pdfs is None:
         pdf_uploaded = False
         output_id = None
-        print(output_id)
     if audio is not None:
         transcript = transcriptor.get_transcript(audio)
         chat_history.append({"role": "user", "content": transcript})
     elif input_text:
-        print(input_text)
         chat_history.append({"role": "user", "content": input_text})
     else:
@@ -84,4 +84,4 @@ with gr.Blocks() as demo:
     )
 if __name__ == "__main__":
-    demo.launch(server_port=9000,quiet=True,show_api=False,app_kwargs={"docs_url":'/docs'})

         pdf_uploaded = True
         pdf_path = pdfs.name
         output_id = vector.upload_pdfs_user(pdf_path)
+        # print(output_id)
     if pdfs is None:
         pdf_uploaded = False
         output_id = None
+        # print(output_id)
     if audio is not None:
         transcript = transcriptor.get_transcript(audio)
         chat_history.append({"role": "user", "content": transcript})
     elif input_text:
+        # print(input_text)
         chat_history.append({"role": "user", "content": input_text})
     else:
     )
 if __name__ == "__main__":
+    demo.launch(server_port=9000)

retriever.py CHANGED Viewed

@@ -73,28 +73,34 @@ class Retriever():
         return stream
     def multiple_contexts(self,user_prompt):
-        questions = self.multi_questions(user_prompt).split("|")
         contexts = []
-        num = 3
-        for i in questions[:num]:
-            if(i!='' and i!=' ' and i.strip()!=''):
-                contexts+=self.filter(i)
-            else:
-                num+=1
         return contexts
-    def filter(self,query,k1=7,k2=17):
         retriever1 = self.vector_store.as_retriever(
                                             search_type="similarity_score_threshold",
                                             search_kwargs={"k": k1,
-                                                           'score_threshold':0.7,
                                                             'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
                                                             },
                                         )
         retriever2 = self.vector_store.as_retriever(
-                                            search_type="similarity_score_threshold",
                                             search_kwargs={"k": k2,
-                                                           'score_threshold':0.7,
                                                             'filter':models.Filter(must_not=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
                                                            },
                                         )
@@ -113,7 +119,7 @@ class Retriever():
         return ret
     def data_retrieve(self, query=''):
-        retrieved_docs = self.vector_store.similarity_search_with_score(query, k=5)
         return [doc for doc, _ in retrieved_docs]
 # ret = Retriever()

         return stream
     def multiple_contexts(self,user_prompt):
+        questions = self.filters
         contexts = []
+        for i in questions:
+            contexts+=self.filter_multiple(user_prompt,i,18)
+            print(len(contexts))
         return contexts
+    def filter_multiple(self,query,mapper,k1=10):
         retriever1 = self.vector_store.as_retriever(
                                             search_type="similarity_score_threshold",
                                             search_kwargs={"k": k1,
+                                                           'score_threshold':0.75,
+                                                            'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=mapper),)])
+                                                            },
+                                        )
+        ret = retriever1.invoke(query)
+        return ret
+    def filter(self,query,k1=10,k2=17):
+        retriever1 = self.vector_store.as_retriever(
+                                            search_type="mmr",
+                                            search_kwargs={"k": k1,
                                                             'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
                                                             },
                                         )
         retriever2 = self.vector_store.as_retriever(
+                                            search_type="mmr",
                                             search_kwargs={"k": k2,
                                                             'filter':models.Filter(must_not=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
                                                            },
                                         )
         return ret
     def data_retrieve(self, query=''):
+        retrieved_docs = self.vector_store.similarity_search_with_score(query, k=10)
         return [doc for doc, _ in retrieved_docs]
 # ret = Retriever()

setup.py CHANGED Viewed

@@ -25,8 +25,9 @@ load_dotenv('.env')
 class Script():
     def __init__(self):
         self.retriever = Retriever()
-        self.openai_client = ChatOpenAI(model="gpt-4o")
         self.groq = ChatGroq(model='llama3-70b-8192')
     def format_docs(self,format_results,id=False):
@@ -49,15 +50,14 @@ class Script():
     def gpt_loaders(self,query:str,history:str):
         template= f"""
-                    # You are an excellent Question & Answering BOT. Given a question and the context, you will answer the question only based on the given context.
-                    # You will be given a user_query (or) User_question (or) User_scenario.
                     ===============================
                     #USER_QUERY :  {{question}}
                     ===============================
                     #METADATA_OF_CONTEXT :
                     -> The context given is related to INDIAN-TAXATION.
-                    -> It may contain how to calculate tax for GOODS/SERVICES/INDIVIDUAL/CARS/TRAINS/etc. — anything related to INDIAN TAXES.
-                    -> Consider providing information about tax types like GST, RTO tax, and additional charges where relevant.
                     #CONTEXT : {{context}}
                     ===============================
                     You are also given previous ChatHistories (User question and corresponding AI answer) to you as extra data.
@@ -65,25 +65,30 @@ class Script():
                     --# If it is not relevant to the current question, do not take it.
                     #Chat History : {{history}}
                     ===============================
-                    -> You are allowed to provide the answer only from the given context.
                     -> Don't provide your own answer that is not in the given context.
-                    -> If you are not able to answer the given question from the context => PROVIDE "Sorry! Unable to find an answer for your question. Try Again."
-                    -> Try to be precise and provide a proper output for the question. Don't explain any questions too lengthy max[100 words].
-                    -> Provide an answer only to the question that is asked.
                     ===============================
                     # OUTPUT FORMAT:
                     -> Your output may be given to a voice model for a speech output. Try to be precise with your words. At the same time, fill the user with your answer.
                     -> Don't provide any further explanation apart from the answer output.
-                    -> Provide a brief breakdown of the different types of applicable taxes if relevant.
-                    -> Make sure to state the specific taxes for scenarios like vehicle purchases, real estate, etc., from the CONTEXT.
                 """
         rag_prompt = PromptTemplate.from_template(template)
         rag_chain = (
                     rag_prompt
                     | self.openai_client
                     | StrOutputParser()
                     )
-        question ={"context": self.format_docs(self.retriever.filter(query)), "question": query, "history": history}
         return rag_chain,question
     def gpt_loaders_id(self,query:str,history:str,id:str):

 class Script():
     def __init__(self):
         self.retriever = Retriever()
+        self.openai_client = ChatOpenAI(model="gpt-4o-mini",temperature=0.1)
         self.groq = ChatGroq(model='llama3-70b-8192')
+        self.groq1 = ChatGroq(model='llama3-8b-8192')
     def format_docs(self,format_results,id=False):
     def gpt_loaders(self,query:str,history:str):
         template= f"""
+                    # You are an excellent Question & Answering BOT based on Context.
+                    # TASK : Given a question and the context, you are required to answer the question..
+                    # User questions may be given as a user_query (or) User_question (or) User_scenario.
                     ===============================
                     #USER_QUERY :  {{question}}
                     ===============================
                     #METADATA_OF_CONTEXT :
                     -> The context given is related to INDIAN-TAXATION.
                     #CONTEXT : {{context}}
                     ===============================
                     You are also given previous ChatHistories (User question and corresponding AI answer) to you as extra data.
                     --# If it is not relevant to the current question, do not take it.
                     #Chat History : {{history}}
                     ===============================
                     -> Don't provide your own answer that is not in the given context.
+                    -> If you can provide a similar answer from the context that may be relevant but not exactly correct for the question, you can provide that answer.
+                    -> Try to provide a proper output for the question. Don't explain any questions too lengthy max[100 words].
                     ===============================
                     # OUTPUT FORMAT:
                     -> Your output may be given to a voice model for a speech output. Try to be precise with your words. At the same time, fill the user with your answer.
                     -> Don't provide any further explanation apart from the answer output.
+                    # STEP 1 : Generate a output for the query from the context:
+                    # STEP 2 : -> Based on the current output check if it is relevant to the question again.
+                               -> If you are not 100% able to answer the given question from the context => PROVIDE "Sorry! Unable to find an answer for your question. Try Again."
                 """
+        # template = f"""ANSWER THE USER QUESTION BASED ON THE GIVEN CONTEXT ALONE.
+        #     UESR QUESTION : {{question}}
+        #             CONTEXT : {{context}}
+        #             {{history}}
+        # """
         rag_prompt = PromptTemplate.from_template(template)
         rag_chain = (
                     rag_prompt
                     | self.openai_client
                     | StrOutputParser()
                     )
+        question ={"context": self.format_docs(self.retriever.multiple_contexts(query)), "question": query, "history": history}
         return rag_chain,question
     def gpt_loaders_id(self,query:str,history:str,id:str):