Spaces:
Runtime error
Runtime error
Commit
·
e900f80
1
Parent(s):
ac12f77
initial commit
Browse files- app.py +4 -4
- retriever.py +18 -12
- setup.py +17 -12
app.py
CHANGED
@@ -32,17 +32,17 @@ def process(audio, input_text, pdfs, chat_history: list[ChatMessage]):
|
|
32 |
pdf_uploaded = True
|
33 |
pdf_path = pdfs.name
|
34 |
output_id = vector.upload_pdfs_user(pdf_path)
|
35 |
-
print(output_id)
|
36 |
if pdfs is None:
|
37 |
pdf_uploaded = False
|
38 |
output_id = None
|
39 |
-
print(output_id)
|
40 |
if audio is not None:
|
41 |
transcript = transcriptor.get_transcript(audio)
|
42 |
chat_history.append({"role": "user", "content": transcript})
|
43 |
|
44 |
elif input_text:
|
45 |
-
print(input_text)
|
46 |
chat_history.append({"role": "user", "content": input_text})
|
47 |
|
48 |
else:
|
@@ -84,4 +84,4 @@ with gr.Blocks() as demo:
|
|
84 |
)
|
85 |
|
86 |
if __name__ == "__main__":
|
87 |
-
demo.launch(server_port=9000
|
|
|
32 |
pdf_uploaded = True
|
33 |
pdf_path = pdfs.name
|
34 |
output_id = vector.upload_pdfs_user(pdf_path)
|
35 |
+
# print(output_id)
|
36 |
if pdfs is None:
|
37 |
pdf_uploaded = False
|
38 |
output_id = None
|
39 |
+
# print(output_id)
|
40 |
if audio is not None:
|
41 |
transcript = transcriptor.get_transcript(audio)
|
42 |
chat_history.append({"role": "user", "content": transcript})
|
43 |
|
44 |
elif input_text:
|
45 |
+
# print(input_text)
|
46 |
chat_history.append({"role": "user", "content": input_text})
|
47 |
|
48 |
else:
|
|
|
84 |
)
|
85 |
|
86 |
if __name__ == "__main__":
|
87 |
+
demo.launch(server_port=9000)
|
retriever.py
CHANGED
@@ -73,28 +73,34 @@ class Retriever():
|
|
73 |
return stream
|
74 |
|
75 |
def multiple_contexts(self,user_prompt):
|
76 |
-
questions = self.
|
77 |
contexts = []
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
contexts+=self.filter(i)
|
82 |
-
else:
|
83 |
-
num+=1
|
84 |
return contexts
|
85 |
|
86 |
-
def
|
87 |
retriever1 = self.vector_store.as_retriever(
|
88 |
search_type="similarity_score_threshold",
|
89 |
search_kwargs={"k": k1,
|
90 |
-
'score_threshold':0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
|
92 |
},
|
93 |
)
|
94 |
retriever2 = self.vector_store.as_retriever(
|
95 |
-
search_type="
|
96 |
search_kwargs={"k": k2,
|
97 |
-
'score_threshold':0.7,
|
98 |
'filter':models.Filter(must_not=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
|
99 |
},
|
100 |
)
|
@@ -113,7 +119,7 @@ class Retriever():
|
|
113 |
return ret
|
114 |
|
115 |
def data_retrieve(self, query=''):
|
116 |
-
retrieved_docs = self.vector_store.similarity_search_with_score(query, k=
|
117 |
return [doc for doc, _ in retrieved_docs]
|
118 |
|
119 |
# ret = Retriever()
|
|
|
73 |
return stream
|
74 |
|
75 |
def multiple_contexts(self,user_prompt):
|
76 |
+
questions = self.filters
|
77 |
contexts = []
|
78 |
+
for i in questions:
|
79 |
+
contexts+=self.filter_multiple(user_prompt,i,18)
|
80 |
+
print(len(contexts))
|
|
|
|
|
|
|
81 |
return contexts
|
82 |
|
83 |
+
def filter_multiple(self,query,mapper,k1=10):
|
84 |
retriever1 = self.vector_store.as_retriever(
|
85 |
search_type="similarity_score_threshold",
|
86 |
search_kwargs={"k": k1,
|
87 |
+
'score_threshold':0.75,
|
88 |
+
'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=mapper),)])
|
89 |
+
},
|
90 |
+
)
|
91 |
+
ret = retriever1.invoke(query)
|
92 |
+
return ret
|
93 |
+
|
94 |
+
def filter(self,query,k1=10,k2=17):
|
95 |
+
retriever1 = self.vector_store.as_retriever(
|
96 |
+
search_type="mmr",
|
97 |
+
search_kwargs={"k": k1,
|
98 |
'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
|
99 |
},
|
100 |
)
|
101 |
retriever2 = self.vector_store.as_retriever(
|
102 |
+
search_type="mmr",
|
103 |
search_kwargs={"k": k2,
|
|
|
104 |
'filter':models.Filter(must_not=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
|
105 |
},
|
106 |
)
|
|
|
119 |
return ret
|
120 |
|
121 |
def data_retrieve(self, query=''):
|
122 |
+
retrieved_docs = self.vector_store.similarity_search_with_score(query, k=10)
|
123 |
return [doc for doc, _ in retrieved_docs]
|
124 |
|
125 |
# ret = Retriever()
|
setup.py
CHANGED
@@ -25,8 +25,9 @@ load_dotenv('.env')
|
|
25 |
class Script():
|
26 |
def __init__(self):
|
27 |
self.retriever = Retriever()
|
28 |
-
self.openai_client = ChatOpenAI(model="gpt-4o")
|
29 |
self.groq = ChatGroq(model='llama3-70b-8192')
|
|
|
30 |
|
31 |
|
32 |
def format_docs(self,format_results,id=False):
|
@@ -49,15 +50,14 @@ class Script():
|
|
49 |
|
50 |
def gpt_loaders(self,query:str,history:str):
|
51 |
template= f"""
|
52 |
-
# You are an excellent Question & Answering BOT
|
53 |
-
#
|
|
|
54 |
===============================
|
55 |
#USER_QUERY : {{question}}
|
56 |
===============================
|
57 |
#METADATA_OF_CONTEXT :
|
58 |
-> The context given is related to INDIAN-TAXATION.
|
59 |
-
-> It may contain how to calculate tax for GOODS/SERVICES/INDIVIDUAL/CARS/TRAINS/etc. — anything related to INDIAN TAXES.
|
60 |
-
-> Consider providing information about tax types like GST, RTO tax, and additional charges where relevant.
|
61 |
#CONTEXT : {{context}}
|
62 |
===============================
|
63 |
You are also given previous ChatHistories (User question and corresponding AI answer) to you as extra data.
|
@@ -65,25 +65,30 @@ class Script():
|
|
65 |
--# If it is not relevant to the current question, do not take it.
|
66 |
#Chat History : {{history}}
|
67 |
===============================
|
68 |
-
-> You are allowed to provide the answer only from the given context.
|
69 |
-> Don't provide your own answer that is not in the given context.
|
70 |
-
-> If you
|
71 |
-
-> Try to
|
72 |
-
-> Provide an answer only to the question that is asked.
|
73 |
===============================
|
74 |
# OUTPUT FORMAT:
|
75 |
-> Your output may be given to a voice model for a speech output. Try to be precise with your words. At the same time, fill the user with your answer.
|
76 |
-> Don't provide any further explanation apart from the answer output.
|
77 |
-
|
78 |
-
|
|
|
|
|
79 |
"""
|
|
|
|
|
|
|
|
|
|
|
80 |
rag_prompt = PromptTemplate.from_template(template)
|
81 |
rag_chain = (
|
82 |
rag_prompt
|
83 |
| self.openai_client
|
84 |
| StrOutputParser()
|
85 |
)
|
86 |
-
question ={"context": self.format_docs(self.retriever.
|
87 |
return rag_chain,question
|
88 |
|
89 |
def gpt_loaders_id(self,query:str,history:str,id:str):
|
|
|
25 |
class Script():
|
26 |
def __init__(self):
|
27 |
self.retriever = Retriever()
|
28 |
+
self.openai_client = ChatOpenAI(model="gpt-4o-mini",temperature=0.1)
|
29 |
self.groq = ChatGroq(model='llama3-70b-8192')
|
30 |
+
self.groq1 = ChatGroq(model='llama3-8b-8192')
|
31 |
|
32 |
|
33 |
def format_docs(self,format_results,id=False):
|
|
|
50 |
|
51 |
def gpt_loaders(self,query:str,history:str):
|
52 |
template= f"""
|
53 |
+
# You are an excellent Question & Answering BOT based on Context.
|
54 |
+
# TASK : Given a question and the context, you are required to answer the question..
|
55 |
+
# User questions may be given as a user_query (or) User_question (or) User_scenario.
|
56 |
===============================
|
57 |
#USER_QUERY : {{question}}
|
58 |
===============================
|
59 |
#METADATA_OF_CONTEXT :
|
60 |
-> The context given is related to INDIAN-TAXATION.
|
|
|
|
|
61 |
#CONTEXT : {{context}}
|
62 |
===============================
|
63 |
You are also given previous ChatHistories (User question and corresponding AI answer) to you as extra data.
|
|
|
65 |
--# If it is not relevant to the current question, do not take it.
|
66 |
#Chat History : {{history}}
|
67 |
===============================
|
|
|
68 |
-> Don't provide your own answer that is not in the given context.
|
69 |
+
-> If you can provide a similar answer from the context that may be relevant but not exactly correct for the question, you can provide that answer.
|
70 |
+
-> Try to provide a proper output for the question. Don't explain any questions too lengthy max[100 words].
|
|
|
71 |
===============================
|
72 |
# OUTPUT FORMAT:
|
73 |
-> Your output may be given to a voice model for a speech output. Try to be precise with your words. At the same time, fill the user with your answer.
|
74 |
-> Don't provide any further explanation apart from the answer output.
|
75 |
+
# STEP 1 : Generate a output for the query from the context:
|
76 |
+
# STEP 2 : -> Based on the current output check if it is relevant to the question again.
|
77 |
+
-> If you are not 100% able to answer the given question from the context => PROVIDE "Sorry! Unable to find an answer for your question. Try Again."
|
78 |
+
|
79 |
"""
|
80 |
+
# template = f"""ANSWER THE USER QUESTION BASED ON THE GIVEN CONTEXT ALONE.
|
81 |
+
# UESR QUESTION : {{question}}
|
82 |
+
# CONTEXT : {{context}}
|
83 |
+
# {{history}}
|
84 |
+
# """
|
85 |
rag_prompt = PromptTemplate.from_template(template)
|
86 |
rag_chain = (
|
87 |
rag_prompt
|
88 |
| self.openai_client
|
89 |
| StrOutputParser()
|
90 |
)
|
91 |
+
question ={"context": self.format_docs(self.retriever.multiple_contexts(query)), "question": query, "history": history}
|
92 |
return rag_chain,question
|
93 |
|
94 |
def gpt_loaders_id(self,query:str,history:str,id:str):
|