Spaces:
Runtime error
Runtime error
File size: 6,238 Bytes
7661630 6c077f9 7661630 6c077f9 7661630 6c077f9 e900f80 6c077f9 e900f80 6c077f9 e900f80 7661630 6c077f9 e900f80 7661630 e900f80 6c077f9 7661630 e900f80 7661630 6c077f9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import os
from langchain_openai import OpenAIEmbeddings
from qdrant_client import QdrantClient
from langchain_qdrant import QdrantVectorStore
from qdrant_client.http import models
from langchain_groq import ChatGroq
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from dotenv import load_dotenv
# Load environment variables
load_dotenv('.env')
class Retriever():
def __init__(self):
# Initialize Qdrant client
qdrant_client = QdrantClient(
url=os.getenv("QDRANT_URL"),
api_key=os.getenv("QDRANT_API_KEY")
)
# Initialize Qdrant vector store
self.vector_store = QdrantVectorStore(
client=qdrant_client,
collection_name="siel-ai-assignment",
embedding=OpenAIEmbeddings(),
)
self.vector_store_user = QdrantVectorStore(
client=qdrant_client,
collection_name="siel-ai-user",
embedding=OpenAIEmbeddings(),
)
self.filters = ['Taxation-Goods-and-service-Tax',
'Taxation-INCOME-TAX-LAW',
'Direct Tax Laws and International Taxation',
'Indirect Tax Laws',
'INDIAN Income Tax ACTS',
'ONLINESITES']
self.groq = ChatGroq(model='llama3-70b-8192')
def multi_questions(self,user_prompt):
llm = self.groq
prompt = f'''
# You are an excellent Query Decomposer for database retrieval optimization.
# You are given a user_query.
===============================
# TASK:
-> Your task is to provide a structured and hierarchical breakdown of the user query.
-> This breakdown should be in the form of an ordered sequence that helps in extracting the right context from the database.
-> Build the user query from the bottom level (basic requirements) to the top level (more specific details), ensuring the retrieval context improves at each level.
===============================
# USER_QUERY: {{user}}
===============================
# EXAMPLE:
1. #USER_QUERY: "For 5 lakh, what type of taxes should I pay and how much?"
-> #EXPECTED OUTPUT: | I'm purchasing a car for 5 lakh. | What type of taxes should I pay on the purchase of automobiles? | What type of taxes should I pay on the purchase of a car for 5 lakh? |
2. #USER_QUERY: "For 5 lakh, what type of taxes should I pay and how much?"
-> #EXPECTED OUTPUT: | NEW TAX REGIME and Income tax. | My income is 5 lakh. What type of taxes should I pay and how much should I pay? |
===============================
# OUTPUT FORMAT:
-> Provide the formatted output separated with the pipe '|' enclosed as: |...|...|
-> Stick to the given format without any additional explanation. Your only response must be the formatted sequence of queries.
-> Do not answer the user question directly. Your job is to provide the decomposed queries in the format shown in the examples.
'''
rag_prompt = PromptTemplate.from_template(prompt)
l = (rag_prompt | llm | StrOutputParser())
stream = l.invoke({"user":user_prompt})
return stream
def multiple_contexts(self,user_prompt):
questions = self.filters
contexts = []
for i in questions:
contexts+=self.filter_multiple(user_prompt,i,18)
print(len(contexts))
return contexts
def filter_multiple(self,query,mapper,k1=10):
retriever1 = self.vector_store.as_retriever(
search_type="similarity_score_threshold",
search_kwargs={"k": k1,
'score_threshold':0.75,
'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=mapper),)])
},
)
ret = retriever1.invoke(query)
return ret
def filter(self,query,k1=10,k2=17):
retriever1 = self.vector_store.as_retriever(
search_type="mmr",
search_kwargs={"k": k1,
'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
},
)
retriever2 = self.vector_store.as_retriever(
search_type="mmr",
search_kwargs={"k": k2,
'filter':models.Filter(must_not=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
},
)
ret = retriever1.invoke(query)+retriever2.invoke(query)
return ret
def id_filter(self,query,id):
retriever1 = self.vector_store_user.as_retriever(
search_type="similarity_score_threshold",
search_kwargs={"k": 10,
'score_threshold':0.7,
'filter':models.Filter(must=[models.FieldCondition(key="metadata.ID", match=models.MatchValue(value=id),)])
}
)
ret = retriever1.invoke(query)
return ret
def data_retrieve(self, query=''):
retrieved_docs = self.vector_store.similarity_search_with_score(query, k=10)
return [doc for doc, _ in retrieved_docs]
# ret = Retriever()
# print(ret.multiple_contexts("i'm purchasing a car for 5Lack, what type of taxes should I pay and how much?")) |