Spaces:
Sleeping
Sleeping
File size: 12,417 Bytes
65f2037 c18ba93 158cb83 c18ba93 158cb83 c18ba93 158cb83 c606b41 158cb83 c606b41 98f8277 158cb83 76069ac 2ec10be 76069ac c18ba93 156fe1f c18ba93 3256d61 c18ba93 48fa374 c18ba93 9575a3a c18ba93 9575a3a c18ba93 c606b41 c18ba93 24df666 e33375f 24df666 54578da 24df666 6b65f6c d128dc6 24df666 1fa9320 24df666 6fc1770 87da2bf c606b41 87da2bf c606b41 483f1ea c606b41 87da2bf c606b41 87da2bf c606b41 87da2bf c606b41 87da2bf c606b41 87da2bf 6fc1770 483f1ea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 |
class KadiApyRagchain:
def __init__(self, llm, vector_store):
"""
Initialize the RAGChain with an LLM instance, a vector store, and a conversation history.
"""
self.llm = llm
self.vector_store = vector_store
self.conversation = []
def process_query(self, query):
"""
Process a user query, handle history, retrieve contexts, and generate a response.
"""
# Add the user query to the conversation history
self.add_to_conversation(user_query=query)
# Rewrite query
rewritten_query = self.rewrite_query(query)
print("RRRRRRRRRREEEEEEEEEEWRITEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE: ",rewritten_query)
# Predict library usage
code_library_usage_prediction = self.predict_library_usage(query)
# Retrieve contexts
doc_contexts = self.retrieve_contexts(query, k=3, filter={"directory": "doc/"})
code_contexts = self.retrieve_contexts(rewritten_query, k=5, filter={"usage": code_library_usage_prediction})
# Vanilla
#doc_contexts = self.retrieve_contexts(query, k=3, filter={"directory": "doc/"})
#code_contexts = self.retrieve_contexts(query, k=5, filter={"directory": "kadi_apy/"})
# doc_contexts = self.retrieve_contexts(query, k=3, filter={"directory": "doc/"})
# code_contexts = self.retrieve_contexts(rewritten_query, k=5, filter={"directory": "kadi_apy/"})
# Format contexts
formatted_doc_contexts = self.format_documents(doc_contexts)
formatted_code_contexts = self.format_documents(code_contexts)
# Generate response
response = self.generate_response(query, formatted_doc_contexts, formatted_code_contexts)
# Add the response to the existing query in the conversation history
#self.add_to_conversation(llm_response=response)
return response
#not supported yet, need session handling in app.py
def add_to_conversation(self, user_query=None, llm_response=None):
"""
Add either the user's query, the LLM's response, or both to the conversation history.
"""
if user_query and llm_response:
# Add a full query-response pair
self.conversation.append({"query": user_query, "response": llm_response})
elif user_query:
# Add a query with no response yet
self.conversation.append({"query": user_query, "response": None})
elif llm_response and self.conversation:
# Add a response to the most recent query
self.conversation[-1]["response"] = llm_response
def get_history(self):
"""
Retrieve the entire conversation history.
"""
return self.conversation
def rewrite_query(self, query):
"""
Rewrite the user's query to align with the language and structure of the library's methods and documentation.
"""
rewrite_prompt = (
f"""You are an intelligent assistant that helps users rewrite their queries.
The vectorstore consists of the source code and documentation of a Python library, which enables users to
programmatically interact with a REST-like API of a software system. The library methods have descriptive
docstrings. Your task is to rewrite the query in a way that aligns with the language and structure of the
library's methods and documentation, ensuring optimal retrieval of relevant information.
Guidelines for rewriting the query:
1. Identify the main action the user wants to perform (e.g., "Upload a file to a record," "Get users of a group").
2. Remove conversational elements like greetings or pleasantries (e.g., "Hello Chatbot", "I need you to help me with").
3. Exclude specific variable values (e.g., "ID of my record is '31'") unless essential to the intent.
4. Rephrase the query to match the format and keywords used in the docstrings, focusing on verbs and objects relevant to the action (e.g., "Add a record to a collection").
5. Given the query the user might need more than one action to achieve his goal. In this case the rewritten query has more than one action.
Examples:
- User query: "Create a Python script with a method that facilitates the creation of records. This method should accept an array of identifiers as a parameter and allow metadata to be added to each record."
- Rewritten query: "create records, add metadata to record"
- User query: "Hi, can you help me write Python code to add a record to a collection? The record ID is '45', and the collection ID is '12'."
Rewritten query: "add a record to a collection"
- User query: I need a python script with which i create a new record with the title: "Hello World" and then link the record to a given collection.
Rewritten query: "create a new record with title" , "link a record to a collection"
Based on these examples and guidelines, rewrite the following user query to align more effectively with the keywords used in the docstrings.
Do not include any addition comments, explanations, or text.
Original query:
{query}
"""
)
return self.llm.invoke(rewrite_prompt).content
def predict_library_usage(self, query):
"""
Use the LLM to predict the relevant library for the user's query.
"""
prompt = (
f"""The query is: '{query}'.
Based on the user's query, assist them by determining which technical document they should read to interact with the software named 'Kadi4Mat'.
There are two different technical documents to choose from:
- Document 1: Provides information on how to use a Python library to interact with the HTTP API of 'Kadi4Mat'.
- Document 2: Provides information on how to use a Python library to implement custom CLI commands to interact with 'Kadi4Mat'.
Your task is to select the single most likely option.
If Document 1 is the best choice, respond with 'kadi_apy/lib/'.
If Document 2 is the best choice, respond with 'kadi_apy/cli/'.
Respond with only the exact corresponding option and do not include any additional comments, explanations, or text."
"""
)
return self.llm.predict(prompt)
def retrieve_contexts(self, query, k, filter = None):
"""
Retrieve relevant documents and source code based on the query and library usage prediction.
"""
context = self.vector_store.similarity_search(query = query, k=k, filter=filter)
return context
def generate_response(self, query, doc_context, code_context):
"""
Generate a response using the retrieved contexts and the LLM.
"""
prompt = f"""You are a Python programming assistant specialized in the "Kadi-APY" library.
The "Kadi-APY" library is a Python package designed to facilitate interaction with the REST-like API of a software platform called Kadi4Mat.
Your task is to answer the user's query based on the guidelines and if needed the combine understanding provided by
"Document snippets" with the implementation details provided by "Code Snippets."
Guidelines if generating code:
- Display the complete code first, followed by a concise explanation in no more than 5 sentences.
General Guideline:
- If the user's query can not be fullfilled based on the provided snippets, reply with "The API does not support the requested functionality"
- If the user's query does not implicate any task, reply with a question asking the user to elaborate.
"Document Snippets": These contain documentation excerpts and code examples that explain how to use the "Kadi-APY" library
Document Snippets:
{doc_context}
"Code Snippets": These are raw source code fragments from the implementation of the "Kadi-APY" library.
Code Snippets:
{code_context}
Query:
{query}
"""
return self.llm.invoke(prompt).content
# def generate_response(self, query, history, doc_context, code_context):
# """
# Generate a response using the retrieved contexts and the LLM.
# """
# formatted_history = format_history(history)
# # Update the prompt with history included
# prompt = f"""
# You are a Python programming assistant specialized in the "Kadi-APY" library.
# The "Kadi-APY" library is a Python package designed to facilitate interaction with the REST-like API of a software platform called Kadi4Mat.
# Your task is to answer the user's query based on the guidelines, and if needed, combine understanding provided by
# "Document Snippets" with the implementation details provided by "Code Snippets."
# Guidelines if generating code:
# - Display the complete code first, followed by a concise explanation in no more than 5 sentences.
# General Guidelines:
# - Refer to the "Conversation History" if it provides context that could enhance your understanding of the user's query.
# - If the user's query cannot be fulfilled based on the provided snippets, reply with "The API does not support the requested functionality."
# - If the user's query does not implicate any task, reply with a question asking the user to elaborate.
# Conversation History:
# {formatted_history}
# Document Snippets:
# {doc_context}
# Code Snippets:
# {code_context}
# Query:
# {query}
# """
# return self.llm.invoke(prompt).content
def format_documents(self, documents):
formatted_docs = []
print("################################# start of doc #######################################")
for i, doc in enumerate(documents, start=1):
formatted_docs.append(f"Snippet {i}: \n")
formatted_docs.append("\n")
all_metadata = doc.metadata
metadata_str = ", ".join(f"{key}: {value}" for key, value in all_metadata.items())
print("\n")
print("------------------------------Beneath is retrieved doc------------------------------------------------")
print(metadata_str)
formatted_docs.append(metadata_str)
print("\n")
formatted_docs.append("\n")
formatted_docs.append(doc.page_content)
print(doc.page_content)
print("\n\n")
print("------------------------------End of retrived doc------------------------------------------------")
formatted_docs.append("\n\n")
return formatted_docs
def format_history(conversation_history):
formatted_history = []
for i, entry in enumerate(conversation_history, start=1):
user_query = entry.get("query", "No query provided")
assistant_response = entry.get("response", "No response yet") # Updated label
formatted_history.append(f"Turn {i}:")
formatted_history.append(f"User Query: {user_query}")
formatted_history.append(f"Assistant Response: {assistant_response}") # Using "Assistant Response"
formatted_history.append("\n")
return "\n".join(formatted_history)
|