bupa1018 commited on
Commit
47a0ac2
·
verified ·
1 Parent(s): 74f1ba5

Update kadiApy_ragchain.py

Browse files
Files changed (1) hide show
  1. kadiApy_ragchain.py +99 -38
kadiApy_ragchain.py CHANGED
@@ -27,8 +27,12 @@ class KadiApyRagchain:
27
  print("Start retrieving:")
28
  #doc_contexts = self.retrieve_contexts(query, k=2, filter={"dataset_category": "kadi_apy_docs"})
29
  #code_contexts = self.retrieve_contexts(rewritten_query, k=3, filter={"usage": code_library_usage_prediction})
30
- context = self.retrieve_contexts(query, k=5)
31
 
 
 
 
 
32
  # Vanilla
33
  #doc_contexts = self.retrieve_contexts(query, k=3, filter={"dataset_category": "kadi_apy_docs"})
34
  #code_contexts = self.retrieve_contexts(query, k=5, filter={"dataset_category": "kadi_apy_source_code"})
@@ -39,14 +43,13 @@ class KadiApyRagchain:
39
 
40
  # Format contexts
41
  print("Formatting docs:")
42
- # formatted_doc_contexts = self.format_documents(doc_contexts)
43
- # formatted_code_contexts = self.format_documents(code_contexts)
44
- formatted_contexts = self.format_documents(context)
45
 
46
  # Generate response
47
  print("Start generatin repsonsse:")
48
- # response = self.generate_response(query, chat_history, formatted_doc_contexts, formatted_code_contexts)
49
- response = self.generate_response(query, chat_history, formatted_contexts)
50
 
51
  # Add the response to the existing query in the conversation history
52
  #self.add_to_conversation(llm_response=response)
@@ -109,6 +112,38 @@ class KadiApyRagchain:
109
  )
110
  return self.llm.invoke(rewrite_prompt).content
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
  def predict_library_usage(self, query):
114
  """
@@ -136,7 +171,44 @@ class KadiApyRagchain:
136
  context = self.vector_store.similarity_search(query = query, k=k, filter=filter)
137
  return context
138
 
139
- # def generate_response(self, query, chat_history, doc_context, code_context):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  # """
141
  # Generate a response using the retrieved contexts and the LLM.
142
  # """
@@ -146,8 +218,8 @@ class KadiApyRagchain:
146
  # prompt = f"""
147
  # You are a Python programming assistant specialized in the "Kadi-APY" library.
148
  # The "Kadi-APY" library is a Python package designed to facilitate interaction with the REST-like API of a software platform called Kadi4Mat.
149
- # Your task is to answer the user's query based on the guidelines, and if needed, combine understanding provided by
150
- # "Document Snippets" with the implementation details provided by "Code Snippets."
151
 
152
  # Guidelines if generating code:
153
  # - Display the complete code first, followed by a concise explanation in no more than 5 sentences.
@@ -161,11 +233,8 @@ class KadiApyRagchain:
161
  # Chat History:
162
  # {formatted_history}
163
 
164
- # Document Snippets:
165
- # {doc_context}
166
-
167
- # Code Snippets:
168
- # {code_context}
169
 
170
  # Query:
171
  # {query}
@@ -173,41 +242,33 @@ class KadiApyRagchain:
173
  # return self.llm.invoke(prompt).content
174
 
175
 
176
- def generate_response(self, query, chat_history, context):
177
- """
178
- Generate a response using the retrieved contexts and the LLM.
179
- """
180
- formatted_history = self.format_history(chat_history)
181
-
182
- # Update the prompt with history included
183
  prompt = f"""
184
  You are a Python programming assistant specialized in the "Kadi-APY" library.
185
  The "Kadi-APY" library is a Python package designed to facilitate interaction with the REST-like API of a software platform called Kadi4Mat.
186
- Your task is to answer the user's query based on the guidelines, and if needed, combine understanding provided by "Context"
187
- "Context" contains snippets from the source code and/or code examples
188
 
189
- Guidelines if generating code:
190
- - Display the complete code first, followed by a concise explanation in no more than 5 sentences.
191
-
192
- General Guidelines:
193
- - Refer to the "Chat History" if it provides context that could enhance your understanding of the user's query.
194
- - Always include the "Chat History" if relevant to the user's query for continuity and clarity in responses.
195
- - If the user's query cannot be fulfilled based on the provided snippets, reply with "The API does not support the requested functionality."
196
- - If the user's query does not implicate any task, reply with a question asking the user to elaborate.
197
-
198
- Chat History:
199
- {formatted_history}
200
 
201
  Context:
202
  {context}
203
-
204
- Query:
205
- {query}
206
  """
207
- return self.llm.invoke(prompt).content
208
 
209
 
210
 
 
 
211
  def format_documents(self, documents):
212
  formatted_docs = []
213
  for i, doc in enumerate(documents, start=1):
 
27
  print("Start retrieving:")
28
  #doc_contexts = self.retrieve_contexts(query, k=2, filter={"dataset_category": "kadi_apy_docs"})
29
  #code_contexts = self.retrieve_contexts(rewritten_query, k=3, filter={"usage": code_library_usage_prediction})
30
+ code_contexts = self.retrieve_contexts(query, k=2)
31
 
32
+ query_formulated_question= formulate_question(code_contexts)
33
+ print("question": query_formulated_question)
34
+ doc_contexts = self.retrieve_contexts(query_formulated_question, k=2, filter={"dataset_category": "kadi_apy_docs"})
35
+
36
  # Vanilla
37
  #doc_contexts = self.retrieve_contexts(query, k=3, filter={"dataset_category": "kadi_apy_docs"})
38
  #code_contexts = self.retrieve_contexts(query, k=5, filter={"dataset_category": "kadi_apy_source_code"})
 
43
 
44
  # Format contexts
45
  print("Formatting docs:")
46
+ formatted_doc_contexts = self.format_documents(doc_contexts)
47
+ formatted_code_contexts = self.format_documents(code_contexts)
 
48
 
49
  # Generate response
50
  print("Start generatin repsonsse:")
51
+ response = self.generate_response(query, chat_history, formatted_doc_contexts, formatted_code_contexts)
52
+ #response = self.generate_response(query, chat_history, formatted_contexts)
53
 
54
  # Add the response to the existing query in the conversation history
55
  #self.add_to_conversation(llm_response=response)
 
112
  )
113
  return self.llm.invoke(rewrite_prompt).content
114
 
115
+ def ask_rag(self, context):
116
+ rewrite_prompt = (
117
+ f""" "Code snippets" consists of methods and/or class declaration from the source code of an API Wrapper Python library.
118
+ Your task is to write a question how to use
119
+
120
+ Guidelines for rewriting the query:
121
+ 1. Identify the main action the user wants to perform (e.g., "Upload a file to a record," "Get users of a group").
122
+ 2. Remove conversational elements like greetings or pleasantries (e.g., "Hello Chatbot", "I need you to help me with").
123
+ 3. Exclude specific variable values (e.g., "ID of my record is '31'") unless essential to the intent.
124
+ 4. Rephrase the query to match the format and keywords used in the docstrings, focusing on verbs and objects relevant to the action (e.g., "Add a record to a collection").
125
+ 5. Given the query the user might need more than one action to achieve his goal. In this case the rewritten query has more than one action.
126
+
127
+ Examples:
128
+ - User query: "Create a Python script with a method that facilitates the creation of records. This method should accept an array of identifiers as a parameter and allow metadata to be added to each record."
129
+ - Rewritten query: "create records, add metadata to record"
130
+ - User query: "Hi, can you help me write Python code to add a record to a collection? The record ID is '45', and the collection ID is '12'."
131
+ Rewritten query: "add a record to a collection"
132
+ - User query: I need a python script with which i create a new record with the title: "Hello World" and then link the record to a given collection.
133
+ Rewritten query: "create a new record with title" , "link a record to a collection"
134
+
135
+ Based on these examples and guidelines, rewrite the following user query to align more effectively with the keywords used in the docstrings.
136
+ Do not include any addition comments, explanations, or text.
137
+
138
+ Original query:
139
+ {query}
140
+ """
141
+ )
142
+ return self.llm.invoke(rewrite_prompt).content
143
+
144
+
145
+
146
+
147
 
148
  def predict_library_usage(self, query):
149
  """
 
171
  context = self.vector_store.similarity_search(query = query, k=k, filter=filter)
172
  return context
173
 
174
+ def generate_response(self, query, chat_history, doc_context, code_context):
175
+ """
176
+ Generate a response using the retrieved contexts and the LLM.
177
+ """
178
+ formatted_history = self.format_history(chat_history)
179
+
180
+ # Update the prompt with history included
181
+ prompt = f"""
182
+ You are a Python programming assistant specialized in the "Kadi-APY" library.
183
+ The "Kadi-APY" library is a Python package designed to facilitate interaction with the REST-like API of a software platform called Kadi4Mat.
184
+ Your task is to answer the user's query based on the guidelines, and if needed, combine understanding provided by
185
+ "Document Snippets" with the implementation details provided by "Code Snippets."
186
+
187
+ Guidelines if generating code:
188
+ - Display the complete code first, followed by a concise explanation in no more than 5 sentences.
189
+
190
+ General Guidelines:
191
+ - Refer to the "Chat History" if it provides context that could enhance your understanding of the user's query.
192
+ - Always include the "Chat History" if relevant to the user's query for continuity and clarity in responses.
193
+ - If the user's query cannot be fulfilled based on the provided snippets, reply with "The API does not support the requested functionality."
194
+ - If the user's query does not implicate any task, reply with a question asking the user to elaborate.
195
+
196
+ Chat History:
197
+ {formatted_history}
198
+
199
+ Document Snippets:
200
+ {doc_context}
201
+
202
+ Code Snippets:
203
+ {code_context}
204
+
205
+ Query:
206
+ {query}
207
+ """
208
+ return self.llm.invoke(prompt).content
209
+
210
+
211
+ # def generate_response(self, query, chat_history, context):
212
  # """
213
  # Generate a response using the retrieved contexts and the LLM.
214
  # """
 
218
  # prompt = f"""
219
  # You are a Python programming assistant specialized in the "Kadi-APY" library.
220
  # The "Kadi-APY" library is a Python package designed to facilitate interaction with the REST-like API of a software platform called Kadi4Mat.
221
+ # Your task is to answer the user's query based on the guidelines, and if needed, combine understanding provided by "Context"
222
+ # "Context" contains snippets from the source code and/or code examples
223
 
224
  # Guidelines if generating code:
225
  # - Display the complete code first, followed by a concise explanation in no more than 5 sentences.
 
233
  # Chat History:
234
  # {formatted_history}
235
 
236
+ # Context:
237
+ # {context}
 
 
 
238
 
239
  # Query:
240
  # {query}
 
242
  # return self.llm.invoke(prompt).content
243
 
244
 
245
+
246
+
247
+ def formulate_question(self, source_code):
248
+ """
249
+ Generate a response using the retrieved contexts and the LLM.
250
+ """
 
251
  prompt = f"""
252
  You are a Python programming assistant specialized in the "Kadi-APY" library.
253
  The "Kadi-APY" library is a Python package designed to facilitate interaction with the REST-like API of a software platform called Kadi4Mat.
254
+ Your task is to formulate the next logical question a programmer would ask themselves to implement and run the method provided in the "context".
 
255
 
256
+ "Context" contains snippets from the source code and metadata that provide details about the method.
257
+
258
+ Guidelines for generating questions:
259
+ - The question should be specific to the programmer's intent of using the method within a Python script.
260
+ - Focus on determining the entry point of the class to which the method belongs.
261
+ - Avoid vague or general questions; be precise about the next actionable steps.
 
 
 
 
 
262
 
263
  Context:
264
  {context}
 
 
 
265
  """
266
+ return self.llm.invoke(prompt).content
267
 
268
 
269
 
270
+
271
+
272
  def format_documents(self, documents):
273
  formatted_docs = []
274
  for i, doc in enumerate(documents, start=1):