edithram23 commited on
Commit
6c077f9
·
1 Parent(s): 7661630

initial commit

Browse files
Files changed (4) hide show
  1. app.py +2 -2
  2. requirements.txt +0 -0
  3. retriever.py +55 -5
  4. setup.py +15 -14
app.py CHANGED
@@ -65,7 +65,7 @@ with gr.Blocks() as demo:
65
  with gr.Row():
66
  with gr.Column(scale=1, min_width=300):
67
  input_pdf = gr.File(label="Upload PDF", file_types=[".pdf"], file_count='single')
68
- gr.Markdown("_Use a PDF to enhance the chatbot's knowledge!_", visible=not pdf_uploaded)
69
 
70
  with gr.Row():
71
  chatbot = gr.Chatbot(label="Chatbot Conversation", type="messages", bubble_full_width=True, show_copy_button=True, autoscroll=True)
@@ -84,4 +84,4 @@ with gr.Blocks() as demo:
84
  )
85
 
86
  if __name__ == "__main__":
87
- demo.launch()
 
65
  with gr.Row():
66
  with gr.Column(scale=1, min_width=300):
67
  input_pdf = gr.File(label="Upload PDF", file_types=[".pdf"], file_count='single')
68
+ gr.Markdown("_Upload a PDF to chat with it!_", visible=not pdf_uploaded)
69
 
70
  with gr.Row():
71
  chatbot = gr.Chatbot(label="Chatbot Conversation", type="messages", bubble_full_width=True, show_copy_button=True, autoscroll=True)
 
84
  )
85
 
86
  if __name__ == "__main__":
87
+ demo.launch(server_port=9000,quiet=True,show_api=False,app_kwargs={"docs_url":'/docs'})
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
retriever.py CHANGED
@@ -3,6 +3,9 @@ from langchain_openai import OpenAIEmbeddings
3
  from qdrant_client import QdrantClient
4
  from langchain_qdrant import QdrantVectorStore
5
  from qdrant_client.http import models
 
 
 
6
 
7
  from dotenv import load_dotenv
8
 
@@ -33,18 +36,64 @@ class Retriever():
33
  'Indirect Tax Laws',
34
  'INDIAN Income Tax ACTS',
35
  'ONLINESITES']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- def filter(self,query):
 
 
 
 
 
 
 
 
 
 
 
38
  retriever1 = self.vector_store.as_retriever(
39
  search_type="similarity_score_threshold",
40
- search_kwargs={"k": 7,
41
  'score_threshold':0.7,
42
  'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
43
  },
44
  )
45
  retriever2 = self.vector_store.as_retriever(
46
  search_type="similarity_score_threshold",
47
- search_kwargs={"k": 17,
48
  'score_threshold':0.7,
49
  'filter':models.Filter(must_not=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
50
  },
@@ -64,7 +113,8 @@ class Retriever():
64
  return ret
65
 
66
  def data_retrieve(self, query=''):
67
- retrieved_docs = self.vector_store.similarity_search_with_score(query, k=20)
68
  return [doc for doc, _ in retrieved_docs]
69
 
70
-
 
 
3
  from qdrant_client import QdrantClient
4
  from langchain_qdrant import QdrantVectorStore
5
  from qdrant_client.http import models
6
+ from langchain_groq import ChatGroq
7
+ from langchain_core.output_parsers import StrOutputParser
8
+ from langchain_core.prompts import PromptTemplate
9
 
10
  from dotenv import load_dotenv
11
 
 
36
  'Indirect Tax Laws',
37
  'INDIAN Income Tax ACTS',
38
  'ONLINESITES']
39
+ self.groq = ChatGroq(model='llama3-70b-8192')
40
+
41
+
42
+
43
+ def multi_questions(self,user_prompt):
44
+ llm = self.groq
45
+ prompt = f'''
46
+ # You are an excellent Query Decomposer for database retrieval optimization.
47
+ # You are given a user_query.
48
+ ===============================
49
+ # TASK:
50
+ -> Your task is to provide a structured and hierarchical breakdown of the user query.
51
+ -> This breakdown should be in the form of an ordered sequence that helps in extracting the right context from the database.
52
+ -> Build the user query from the bottom level (basic requirements) to the top level (more specific details), ensuring the retrieval context improves at each level.
53
+ ===============================
54
+ # USER_QUERY: {{user}}
55
+ ===============================
56
+ # EXAMPLE:
57
+ 1. #USER_QUERY: "For 5 lakh, what type of taxes should I pay and how much?"
58
+ -> #EXPECTED OUTPUT: | I'm purchasing a car for 5 lakh. | What type of taxes should I pay on the purchase of automobiles? | What type of taxes should I pay on the purchase of a car for 5 lakh? |
59
+
60
+ 2. #USER_QUERY: "For 5 lakh, what type of taxes should I pay and how much?"
61
+ -> #EXPECTED OUTPUT: | NEW TAX REGIME and Income tax. | My income is 5 lakh. What type of taxes should I pay and how much should I pay? |
62
+
63
+ ===============================
64
+ # OUTPUT FORMAT:
65
+ -> Provide the formatted output separated with the pipe '|' enclosed as: |...|...|
66
+ -> Stick to the given format without any additional explanation. Your only response must be the formatted sequence of queries.
67
+ -> Do not answer the user question directly. Your job is to provide the decomposed queries in the format shown in the examples.
68
+ '''
69
+
70
+ rag_prompt = PromptTemplate.from_template(prompt)
71
+ l = (rag_prompt | llm | StrOutputParser())
72
+ stream = l.invoke({"user":user_prompt})
73
+ return stream
74
 
75
+ def multiple_contexts(self,user_prompt):
76
+ questions = self.multi_questions(user_prompt).split("|")
77
+ contexts = []
78
+ num = 3
79
+ for i in questions[:num]:
80
+ if(i!='' and i!=' ' and i.strip()!=''):
81
+ contexts+=self.filter(i)
82
+ else:
83
+ num+=1
84
+ return contexts
85
+
86
+ def filter(self,query,k1=7,k2=17):
87
  retriever1 = self.vector_store.as_retriever(
88
  search_type="similarity_score_threshold",
89
+ search_kwargs={"k": k1,
90
  'score_threshold':0.7,
91
  'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
92
  },
93
  )
94
  retriever2 = self.vector_store.as_retriever(
95
  search_type="similarity_score_threshold",
96
+ search_kwargs={"k": k2,
97
  'score_threshold':0.7,
98
  'filter':models.Filter(must_not=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
99
  },
 
113
  return ret
114
 
115
  def data_retrieve(self, query=''):
116
+ retrieved_docs = self.vector_store.similarity_search_with_score(query, k=5)
117
  return [doc for doc, _ in retrieved_docs]
118
 
119
+ # ret = Retriever()
120
+ # print(ret.multiple_contexts("i'm purchasing a car for 5Lack, what type of taxes should I pay and how much?"))
setup.py CHANGED
@@ -49,32 +49,33 @@ class Script():
49
 
50
  def gpt_loaders(self,query:str,history:str):
51
  template= f"""
52
- # You are an excellent Question & Answering BOT. Given a question and the context you will answer the question only based on the given context.
53
  # You will be given a user_query (or) User_question (or) User_scenario.
54
- # TASK: Your task is to provide an Answer to the USER_QUERY with the given CONTEXT_DATA.
55
  ===============================
56
  #USER_QUERY : {{question}}
57
  ===============================
58
- #METADATA_OF_CONTEXT : -> The context given is related to INDIAN-TAXATIONS.
59
- -> It may contain how to calculate tax for GOODS/SERVICES/INDIVIDUAL/CARS/TRAINS/etc anything related to INDIAN TAXES.
60
- -> Based on the user_query use the context accordingly.
61
- -> You can also provide a rough calculation for an example if asked for tax calculations related from the CONTEXT (if it is available in the CONTEXT).
62
  #CONTEXT : {{context}}
63
  ===============================
64
- You are also given previous ChatHistories (User question and corressponding AI answer) to you as an extra data.
65
- --# When to take the history as CONTEXT : Only if the history is relevant to the current question you are permitted to take the chat history as a context.
66
- --# If it is not relevant to the current question do not take it.
67
  #Chat History : {{history}}
68
  ===============================
69
  -> You are allowed to provide the answer only from the given context.
70
  -> Don't provide your own answer that is not in the given context.
71
  -> If you are not able to answer the given question from the context => PROVIDE "Sorry! Unable to find an answer for your question. Try Again."
72
- -> Try to be a precise and provide a proper output for the question. Don't explain any questions too lengthy max[100 words].
73
- -> Provide answer only to the question that is asked.
74
  ===============================
75
  # OUTPUT FORMAT:
76
- -> Your output may be given to a voice model for a speech output. Try to be precise with your words. At the same time, fill the user with your answer
77
- -> Don't provide any etc explanation apart from the answer output.
 
 
78
  """
79
  rag_prompt = PromptTemplate.from_template(template)
80
  rag_chain = (
@@ -82,7 +83,7 @@ class Script():
82
  | self.openai_client
83
  | StrOutputParser()
84
  )
85
- question ={"context": self.format_docs(self.retriever.data_retrieve(query)), "question": query, "history": history}
86
  return rag_chain,question
87
 
88
  def gpt_loaders_id(self,query:str,history:str,id:str):
 
49
 
50
  def gpt_loaders(self,query:str,history:str):
51
  template= f"""
52
+ # You are an excellent Question & Answering BOT. Given a question and the context, you will answer the question only based on the given context.
53
  # You will be given a user_query (or) User_question (or) User_scenario.
 
54
  ===============================
55
  #USER_QUERY : {{question}}
56
  ===============================
57
+ #METADATA_OF_CONTEXT :
58
+ -> The context given is related to INDIAN-TAXATION.
59
+ -> It may contain how to calculate tax for GOODS/SERVICES/INDIVIDUAL/CARS/TRAINS/etc. — anything related to INDIAN TAXES.
60
+ -> Consider providing information about tax types like GST, RTO tax, and additional charges where relevant.
61
  #CONTEXT : {{context}}
62
  ===============================
63
+ You are also given previous ChatHistories (User question and corresponding AI answer) to you as extra data.
64
+ --# When to take the history as CONTEXT: Only if the history is relevant to the current question, you are permitted to take the chat history as a context.
65
+ --# If it is not relevant to the current question, do not take it.
66
  #Chat History : {{history}}
67
  ===============================
68
  -> You are allowed to provide the answer only from the given context.
69
  -> Don't provide your own answer that is not in the given context.
70
  -> If you are not able to answer the given question from the context => PROVIDE "Sorry! Unable to find an answer for your question. Try Again."
71
+ -> Try to be precise and provide a proper output for the question. Don't explain any questions too lengthy max[100 words].
72
+ -> Provide an answer only to the question that is asked.
73
  ===============================
74
  # OUTPUT FORMAT:
75
+ -> Your output may be given to a voice model for a speech output. Try to be precise with your words. At the same time, fill the user with your answer.
76
+ -> Don't provide any further explanation apart from the answer output.
77
+ -> Provide a brief breakdown of the different types of applicable taxes if relevant.
78
+ -> Make sure to state the specific taxes for scenarios like vehicle purchases, real estate, etc., from the CONTEXT.
79
  """
80
  rag_prompt = PromptTemplate.from_template(template)
81
  rag_chain = (
 
83
  | self.openai_client
84
  | StrOutputParser()
85
  )
86
+ question ={"context": self.format_docs(self.retriever.filter(query)), "question": query, "history": history}
87
  return rag_chain,question
88
 
89
  def gpt_loaders_id(self,query:str,history:str,id:str):