edithram23 commited on
Commit
e900f80
·
1 Parent(s): ac12f77

initial commit

Browse files
Files changed (3) hide show
  1. app.py +4 -4
  2. retriever.py +18 -12
  3. setup.py +17 -12
app.py CHANGED
@@ -32,17 +32,17 @@ def process(audio, input_text, pdfs, chat_history: list[ChatMessage]):
32
  pdf_uploaded = True
33
  pdf_path = pdfs.name
34
  output_id = vector.upload_pdfs_user(pdf_path)
35
- print(output_id)
36
  if pdfs is None:
37
  pdf_uploaded = False
38
  output_id = None
39
- print(output_id)
40
  if audio is not None:
41
  transcript = transcriptor.get_transcript(audio)
42
  chat_history.append({"role": "user", "content": transcript})
43
 
44
  elif input_text:
45
- print(input_text)
46
  chat_history.append({"role": "user", "content": input_text})
47
 
48
  else:
@@ -84,4 +84,4 @@ with gr.Blocks() as demo:
84
  )
85
 
86
  if __name__ == "__main__":
87
- demo.launch(server_port=9000,quiet=True,show_api=False,app_kwargs={"docs_url":'/docs'})
 
32
  pdf_uploaded = True
33
  pdf_path = pdfs.name
34
  output_id = vector.upload_pdfs_user(pdf_path)
35
+ # print(output_id)
36
  if pdfs is None:
37
  pdf_uploaded = False
38
  output_id = None
39
+ # print(output_id)
40
  if audio is not None:
41
  transcript = transcriptor.get_transcript(audio)
42
  chat_history.append({"role": "user", "content": transcript})
43
 
44
  elif input_text:
45
+ # print(input_text)
46
  chat_history.append({"role": "user", "content": input_text})
47
 
48
  else:
 
84
  )
85
 
86
  if __name__ == "__main__":
87
+ demo.launch(server_port=9000)
retriever.py CHANGED
@@ -73,28 +73,34 @@ class Retriever():
73
  return stream
74
 
75
  def multiple_contexts(self,user_prompt):
76
- questions = self.multi_questions(user_prompt).split("|")
77
  contexts = []
78
- num = 3
79
- for i in questions[:num]:
80
- if(i!='' and i!=' ' and i.strip()!=''):
81
- contexts+=self.filter(i)
82
- else:
83
- num+=1
84
  return contexts
85
 
86
- def filter(self,query,k1=7,k2=17):
87
  retriever1 = self.vector_store.as_retriever(
88
  search_type="similarity_score_threshold",
89
  search_kwargs={"k": k1,
90
- 'score_threshold':0.7,
 
 
 
 
 
 
 
 
 
 
91
  'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
92
  },
93
  )
94
  retriever2 = self.vector_store.as_retriever(
95
- search_type="similarity_score_threshold",
96
  search_kwargs={"k": k2,
97
- 'score_threshold':0.7,
98
  'filter':models.Filter(must_not=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
99
  },
100
  )
@@ -113,7 +119,7 @@ class Retriever():
113
  return ret
114
 
115
  def data_retrieve(self, query=''):
116
- retrieved_docs = self.vector_store.similarity_search_with_score(query, k=5)
117
  return [doc for doc, _ in retrieved_docs]
118
 
119
  # ret = Retriever()
 
73
  return stream
74
 
75
  def multiple_contexts(self,user_prompt):
76
+ questions = self.filters
77
  contexts = []
78
+ for i in questions:
79
+ contexts+=self.filter_multiple(user_prompt,i,18)
80
+ print(len(contexts))
 
 
 
81
  return contexts
82
 
83
+ def filter_multiple(self,query,mapper,k1=10):
84
  retriever1 = self.vector_store.as_retriever(
85
  search_type="similarity_score_threshold",
86
  search_kwargs={"k": k1,
87
+ 'score_threshold':0.75,
88
+ 'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=mapper),)])
89
+ },
90
+ )
91
+ ret = retriever1.invoke(query)
92
+ return ret
93
+
94
+ def filter(self,query,k1=10,k2=17):
95
+ retriever1 = self.vector_store.as_retriever(
96
+ search_type="mmr",
97
+ search_kwargs={"k": k1,
98
  'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
99
  },
100
  )
101
  retriever2 = self.vector_store.as_retriever(
102
+ search_type="mmr",
103
  search_kwargs={"k": k2,
 
104
  'filter':models.Filter(must_not=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
105
  },
106
  )
 
119
  return ret
120
 
121
  def data_retrieve(self, query=''):
122
+ retrieved_docs = self.vector_store.similarity_search_with_score(query, k=10)
123
  return [doc for doc, _ in retrieved_docs]
124
 
125
  # ret = Retriever()
setup.py CHANGED
@@ -25,8 +25,9 @@ load_dotenv('.env')
25
  class Script():
26
  def __init__(self):
27
  self.retriever = Retriever()
28
- self.openai_client = ChatOpenAI(model="gpt-4o")
29
  self.groq = ChatGroq(model='llama3-70b-8192')
 
30
 
31
 
32
  def format_docs(self,format_results,id=False):
@@ -49,15 +50,14 @@ class Script():
49
 
50
  def gpt_loaders(self,query:str,history:str):
51
  template= f"""
52
- # You are an excellent Question & Answering BOT. Given a question and the context, you will answer the question only based on the given context.
53
- # You will be given a user_query (or) User_question (or) User_scenario.
 
54
  ===============================
55
  #USER_QUERY : {{question}}
56
  ===============================
57
  #METADATA_OF_CONTEXT :
58
  -> The context given is related to INDIAN-TAXATION.
59
- -> It may contain how to calculate tax for GOODS/SERVICES/INDIVIDUAL/CARS/TRAINS/etc. — anything related to INDIAN TAXES.
60
- -> Consider providing information about tax types like GST, RTO tax, and additional charges where relevant.
61
  #CONTEXT : {{context}}
62
  ===============================
63
  You are also given previous ChatHistories (User question and corresponding AI answer) to you as extra data.
@@ -65,25 +65,30 @@ class Script():
65
  --# If it is not relevant to the current question, do not take it.
66
  #Chat History : {{history}}
67
  ===============================
68
- -> You are allowed to provide the answer only from the given context.
69
  -> Don't provide your own answer that is not in the given context.
70
- -> If you are not able to answer the given question from the context => PROVIDE "Sorry! Unable to find an answer for your question. Try Again."
71
- -> Try to be precise and provide a proper output for the question. Don't explain any questions too lengthy max[100 words].
72
- -> Provide an answer only to the question that is asked.
73
  ===============================
74
  # OUTPUT FORMAT:
75
  -> Your output may be given to a voice model for a speech output. Try to be precise with your words. At the same time, fill the user with your answer.
76
  -> Don't provide any further explanation apart from the answer output.
77
- -> Provide a brief breakdown of the different types of applicable taxes if relevant.
78
- -> Make sure to state the specific taxes for scenarios like vehicle purchases, real estate, etc., from the CONTEXT.
 
 
79
  """
 
 
 
 
 
80
  rag_prompt = PromptTemplate.from_template(template)
81
  rag_chain = (
82
  rag_prompt
83
  | self.openai_client
84
  | StrOutputParser()
85
  )
86
- question ={"context": self.format_docs(self.retriever.filter(query)), "question": query, "history": history}
87
  return rag_chain,question
88
 
89
  def gpt_loaders_id(self,query:str,history:str,id:str):
 
25
  class Script():
26
  def __init__(self):
27
  self.retriever = Retriever()
28
+ self.openai_client = ChatOpenAI(model="gpt-4o-mini",temperature=0.1)
29
  self.groq = ChatGroq(model='llama3-70b-8192')
30
+ self.groq1 = ChatGroq(model='llama3-8b-8192')
31
 
32
 
33
  def format_docs(self,format_results,id=False):
 
50
 
51
  def gpt_loaders(self,query:str,history:str):
52
  template= f"""
53
+ # You are an excellent Question & Answering BOT based on Context.
54
+ # TASK : Given a question and the context, you are required to answer the question..
55
+ # User questions may be given as a user_query (or) User_question (or) User_scenario.
56
  ===============================
57
  #USER_QUERY : {{question}}
58
  ===============================
59
  #METADATA_OF_CONTEXT :
60
  -> The context given is related to INDIAN-TAXATION.
 
 
61
  #CONTEXT : {{context}}
62
  ===============================
63
  You are also given previous ChatHistories (User question and corresponding AI answer) to you as extra data.
 
65
  --# If it is not relevant to the current question, do not take it.
66
  #Chat History : {{history}}
67
  ===============================
 
68
  -> Don't provide your own answer that is not in the given context.
69
+ -> If you can provide a similar answer from the context that may be relevant but not exactly correct for the question, you can provide that answer.
70
+ -> Try to provide a proper output for the question. Don't explain any questions too lengthy max[100 words].
 
71
  ===============================
72
  # OUTPUT FORMAT:
73
  -> Your output may be given to a voice model for a speech output. Try to be precise with your words. At the same time, fill the user with your answer.
74
  -> Don't provide any further explanation apart from the answer output.
75
+ # STEP 1 : Generate a output for the query from the context:
76
+ # STEP 2 : -> Based on the current output check if it is relevant to the question again.
77
+ -> If you are not 100% able to answer the given question from the context => PROVIDE "Sorry! Unable to find an answer for your question. Try Again."
78
+
79
  """
80
+ # template = f"""ANSWER THE USER QUESTION BASED ON THE GIVEN CONTEXT ALONE.
81
+ # UESR QUESTION : {{question}}
82
+ # CONTEXT : {{context}}
83
+ # {{history}}
84
+ # """
85
  rag_prompt = PromptTemplate.from_template(template)
86
  rag_chain = (
87
  rag_prompt
88
  | self.openai_client
89
  | StrOutputParser()
90
  )
91
+ question ={"context": self.format_docs(self.retriever.multiple_contexts(query)), "question": query, "history": history}
92
  return rag_chain,question
93
 
94
  def gpt_loaders_id(self,query:str,history:str,id:str):