Spaces:
Runtime error
Runtime error
Commit
·
6c077f9
1
Parent(s):
7661630
initial commit
Browse files- app.py +2 -2
- requirements.txt +0 -0
- retriever.py +55 -5
- setup.py +15 -14
app.py
CHANGED
@@ -65,7 +65,7 @@ with gr.Blocks() as demo:
|
|
65 |
with gr.Row():
|
66 |
with gr.Column(scale=1, min_width=300):
|
67 |
input_pdf = gr.File(label="Upload PDF", file_types=[".pdf"], file_count='single')
|
68 |
-
gr.Markdown("
|
69 |
|
70 |
with gr.Row():
|
71 |
chatbot = gr.Chatbot(label="Chatbot Conversation", type="messages", bubble_full_width=True, show_copy_button=True, autoscroll=True)
|
@@ -84,4 +84,4 @@ with gr.Blocks() as demo:
|
|
84 |
)
|
85 |
|
86 |
if __name__ == "__main__":
|
87 |
-
demo.launch()
|
|
|
65 |
with gr.Row():
|
66 |
with gr.Column(scale=1, min_width=300):
|
67 |
input_pdf = gr.File(label="Upload PDF", file_types=[".pdf"], file_count='single')
|
68 |
+
gr.Markdown("_Upload a PDF to chat with it!_", visible=not pdf_uploaded)
|
69 |
|
70 |
with gr.Row():
|
71 |
chatbot = gr.Chatbot(label="Chatbot Conversation", type="messages", bubble_full_width=True, show_copy_button=True, autoscroll=True)
|
|
|
84 |
)
|
85 |
|
86 |
if __name__ == "__main__":
|
87 |
+
demo.launch(server_port=9000,quiet=True,show_api=False,app_kwargs={"docs_url":'/docs'})
|
requirements.txt
CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
|
|
retriever.py
CHANGED
@@ -3,6 +3,9 @@ from langchain_openai import OpenAIEmbeddings
|
|
3 |
from qdrant_client import QdrantClient
|
4 |
from langchain_qdrant import QdrantVectorStore
|
5 |
from qdrant_client.http import models
|
|
|
|
|
|
|
6 |
|
7 |
from dotenv import load_dotenv
|
8 |
|
@@ -33,18 +36,64 @@ class Retriever():
|
|
33 |
'Indirect Tax Laws',
|
34 |
'INDIAN Income Tax ACTS',
|
35 |
'ONLINESITES']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
retriever1 = self.vector_store.as_retriever(
|
39 |
search_type="similarity_score_threshold",
|
40 |
-
search_kwargs={"k":
|
41 |
'score_threshold':0.7,
|
42 |
'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
|
43 |
},
|
44 |
)
|
45 |
retriever2 = self.vector_store.as_retriever(
|
46 |
search_type="similarity_score_threshold",
|
47 |
-
search_kwargs={"k":
|
48 |
'score_threshold':0.7,
|
49 |
'filter':models.Filter(must_not=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
|
50 |
},
|
@@ -64,7 +113,8 @@ class Retriever():
|
|
64 |
return ret
|
65 |
|
66 |
def data_retrieve(self, query=''):
|
67 |
-
retrieved_docs = self.vector_store.similarity_search_with_score(query, k=
|
68 |
return [doc for doc, _ in retrieved_docs]
|
69 |
|
70 |
-
|
|
|
|
3 |
from qdrant_client import QdrantClient
|
4 |
from langchain_qdrant import QdrantVectorStore
|
5 |
from qdrant_client.http import models
|
6 |
+
from langchain_groq import ChatGroq
|
7 |
+
from langchain_core.output_parsers import StrOutputParser
|
8 |
+
from langchain_core.prompts import PromptTemplate
|
9 |
|
10 |
from dotenv import load_dotenv
|
11 |
|
|
|
36 |
'Indirect Tax Laws',
|
37 |
'INDIAN Income Tax ACTS',
|
38 |
'ONLINESITES']
|
39 |
+
self.groq = ChatGroq(model='llama3-70b-8192')
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
def multi_questions(self,user_prompt):
|
44 |
+
llm = self.groq
|
45 |
+
prompt = f'''
|
46 |
+
# You are an excellent Query Decomposer for database retrieval optimization.
|
47 |
+
# You are given a user_query.
|
48 |
+
===============================
|
49 |
+
# TASK:
|
50 |
+
-> Your task is to provide a structured and hierarchical breakdown of the user query.
|
51 |
+
-> This breakdown should be in the form of an ordered sequence that helps in extracting the right context from the database.
|
52 |
+
-> Build the user query from the bottom level (basic requirements) to the top level (more specific details), ensuring the retrieval context improves at each level.
|
53 |
+
===============================
|
54 |
+
# USER_QUERY: {{user}}
|
55 |
+
===============================
|
56 |
+
# EXAMPLE:
|
57 |
+
1. #USER_QUERY: "For 5 lakh, what type of taxes should I pay and how much?"
|
58 |
+
-> #EXPECTED OUTPUT: | I'm purchasing a car for 5 lakh. | What type of taxes should I pay on the purchase of automobiles? | What type of taxes should I pay on the purchase of a car for 5 lakh? |
|
59 |
+
|
60 |
+
2. #USER_QUERY: "For 5 lakh, what type of taxes should I pay and how much?"
|
61 |
+
-> #EXPECTED OUTPUT: | NEW TAX REGIME and Income tax. | My income is 5 lakh. What type of taxes should I pay and how much should I pay? |
|
62 |
+
|
63 |
+
===============================
|
64 |
+
# OUTPUT FORMAT:
|
65 |
+
-> Provide the formatted output separated with the pipe '|' enclosed as: |...|...|
|
66 |
+
-> Stick to the given format without any additional explanation. Your only response must be the formatted sequence of queries.
|
67 |
+
-> Do not answer the user question directly. Your job is to provide the decomposed queries in the format shown in the examples.
|
68 |
+
'''
|
69 |
+
|
70 |
+
rag_prompt = PromptTemplate.from_template(prompt)
|
71 |
+
l = (rag_prompt | llm | StrOutputParser())
|
72 |
+
stream = l.invoke({"user":user_prompt})
|
73 |
+
return stream
|
74 |
|
75 |
+
def multiple_contexts(self,user_prompt):
|
76 |
+
questions = self.multi_questions(user_prompt).split("|")
|
77 |
+
contexts = []
|
78 |
+
num = 3
|
79 |
+
for i in questions[:num]:
|
80 |
+
if(i!='' and i!=' ' and i.strip()!=''):
|
81 |
+
contexts+=self.filter(i)
|
82 |
+
else:
|
83 |
+
num+=1
|
84 |
+
return contexts
|
85 |
+
|
86 |
+
def filter(self,query,k1=7,k2=17):
|
87 |
retriever1 = self.vector_store.as_retriever(
|
88 |
search_type="similarity_score_threshold",
|
89 |
+
search_kwargs={"k": k1,
|
90 |
'score_threshold':0.7,
|
91 |
'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
|
92 |
},
|
93 |
)
|
94 |
retriever2 = self.vector_store.as_retriever(
|
95 |
search_type="similarity_score_threshold",
|
96 |
+
search_kwargs={"k": k2,
|
97 |
'score_threshold':0.7,
|
98 |
'filter':models.Filter(must_not=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
|
99 |
},
|
|
|
113 |
return ret
|
114 |
|
115 |
def data_retrieve(self, query=''):
|
116 |
+
retrieved_docs = self.vector_store.similarity_search_with_score(query, k=5)
|
117 |
return [doc for doc, _ in retrieved_docs]
|
118 |
|
119 |
+
# ret = Retriever()
|
120 |
+
# print(ret.multiple_contexts("i'm purchasing a car for 5Lack, what type of taxes should I pay and how much?"))
|
setup.py
CHANGED
@@ -49,32 +49,33 @@ class Script():
|
|
49 |
|
50 |
def gpt_loaders(self,query:str,history:str):
|
51 |
template= f"""
|
52 |
-
# You are an excellent Question & Answering BOT. Given a question and the context you will answer the question only based on the given context.
|
53 |
# You will be given a user_query (or) User_question (or) User_scenario.
|
54 |
-
# TASK: Your task is to provide an Answer to the USER_QUERY with the given CONTEXT_DATA.
|
55 |
===============================
|
56 |
#USER_QUERY : {{question}}
|
57 |
===============================
|
58 |
-
#METADATA_OF_CONTEXT :
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
#CONTEXT : {{context}}
|
63 |
===============================
|
64 |
-
You are also given previous ChatHistories (User question and
|
65 |
-
--# When to take the history as CONTEXT
|
66 |
-
--# If it is not relevant to the current question do not take it.
|
67 |
#Chat History : {{history}}
|
68 |
===============================
|
69 |
-> You are allowed to provide the answer only from the given context.
|
70 |
-> Don't provide your own answer that is not in the given context.
|
71 |
-> If you are not able to answer the given question from the context => PROVIDE "Sorry! Unable to find an answer for your question. Try Again."
|
72 |
-
-> Try to be
|
73 |
-
-> Provide answer only to the question that is asked.
|
74 |
===============================
|
75 |
# OUTPUT FORMAT:
|
76 |
-
|
77 |
-
|
|
|
|
|
78 |
"""
|
79 |
rag_prompt = PromptTemplate.from_template(template)
|
80 |
rag_chain = (
|
@@ -82,7 +83,7 @@ class Script():
|
|
82 |
| self.openai_client
|
83 |
| StrOutputParser()
|
84 |
)
|
85 |
-
question ={"context": self.format_docs(self.retriever.
|
86 |
return rag_chain,question
|
87 |
|
88 |
def gpt_loaders_id(self,query:str,history:str,id:str):
|
|
|
49 |
|
50 |
def gpt_loaders(self,query:str,history:str):
|
51 |
template= f"""
|
52 |
+
# You are an excellent Question & Answering BOT. Given a question and the context, you will answer the question only based on the given context.
|
53 |
# You will be given a user_query (or) User_question (or) User_scenario.
|
|
|
54 |
===============================
|
55 |
#USER_QUERY : {{question}}
|
56 |
===============================
|
57 |
+
#METADATA_OF_CONTEXT :
|
58 |
+
-> The context given is related to INDIAN-TAXATION.
|
59 |
+
-> It may contain how to calculate tax for GOODS/SERVICES/INDIVIDUAL/CARS/TRAINS/etc. — anything related to INDIAN TAXES.
|
60 |
+
-> Consider providing information about tax types like GST, RTO tax, and additional charges where relevant.
|
61 |
#CONTEXT : {{context}}
|
62 |
===============================
|
63 |
+
You are also given previous ChatHistories (User question and corresponding AI answer) to you as extra data.
|
64 |
+
--# When to take the history as CONTEXT: Only if the history is relevant to the current question, you are permitted to take the chat history as a context.
|
65 |
+
--# If it is not relevant to the current question, do not take it.
|
66 |
#Chat History : {{history}}
|
67 |
===============================
|
68 |
-> You are allowed to provide the answer only from the given context.
|
69 |
-> Don't provide your own answer that is not in the given context.
|
70 |
-> If you are not able to answer the given question from the context => PROVIDE "Sorry! Unable to find an answer for your question. Try Again."
|
71 |
+
-> Try to be precise and provide a proper output for the question. Don't explain any questions too lengthy max[100 words].
|
72 |
+
-> Provide an answer only to the question that is asked.
|
73 |
===============================
|
74 |
# OUTPUT FORMAT:
|
75 |
+
-> Your output may be given to a voice model for a speech output. Try to be precise with your words. At the same time, fill the user with your answer.
|
76 |
+
-> Don't provide any further explanation apart from the answer output.
|
77 |
+
-> Provide a brief breakdown of the different types of applicable taxes if relevant.
|
78 |
+
-> Make sure to state the specific taxes for scenarios like vehicle purchases, real estate, etc., from the CONTEXT.
|
79 |
"""
|
80 |
rag_prompt = PromptTemplate.from_template(template)
|
81 |
rag_chain = (
|
|
|
83 |
| self.openai_client
|
84 |
| StrOutputParser()
|
85 |
)
|
86 |
+
question ={"context": self.format_docs(self.retriever.filter(query)), "question": query, "history": history}
|
87 |
return rag_chain,question
|
88 |
|
89 |
def gpt_loaders_id(self,query:str,history:str,id:str):
|