bupa1018 commited on
Commit
76069ac
·
1 Parent(s): 1c96354

Update ragchain.py

Browse files
Files changed (1) hide show
  1. ragchain.py +69 -10
ragchain.py CHANGED
@@ -7,6 +7,47 @@ class RAGChain:
7
  self.llm = llm
8
  self.vector_store = vector_store
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def predict_library_usage(self, query):
11
  """
12
  Use the LLM to predict the relevant library for the user's query.
@@ -39,8 +80,8 @@ class RAGChain:
39
  """
40
  Format the retrieved document and code contexts.
41
  """
42
- doc_context = format_kadi_api_doc_context(doc_contexts)
43
- code_context = format_kadi_apy_library_context(code_contexts)
44
 
45
  return doc_context, code_context
46
 
@@ -74,13 +115,31 @@ class RAGChain:
74
  """
75
  return self.llm.invoke(prompt).content
76
 
77
- def rag_workflow(self, query):
78
- """
79
- Complete the RAG workflow: predict library usage, retrieve contexts, and generate a response.
80
- """
81
- library_usage_prediction = self.predict_library_usage(query)
82
 
83
- doc_contexts, code_contexts = self.retrieve_contexts(query, library_usage_prediction)
84
- doc_context, code_context = self.format_context(doc_contexts, code_contexts)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
- return self.generate_response(query, doc_context, code_context)
 
7
  self.llm = llm
8
  self.vector_store = vector_store
9
 
10
+
11
+ def rewrite_query(self, query):
12
+ """
13
+ Rewrite the user's query to align with the language and structure of the library's methods and documentation.
14
+ """
15
+ rewrite_prompt = (
16
+ f"""You are an intelligent assistant that helps users rewrite their queries.
17
+ The vectorstore consists of the source code and documentation of a Python library, which enables users to
18
+ programmatically interact with a REST-like API of a software system. The library methods have descriptive
19
+ docstrings. Your task is to rewrite the query in a way that aligns with the language and structure of the
20
+ library's methods and documentation, ensuring optimal retrieval of relevant information.
21
+
22
+ Guidelines for rewriting the query:
23
+ 1. Identify the main action the user wants to perform (e.g., "Upload a file to a record," "Get users of a group").
24
+ 2. Remove conversational elements like greetings or pleasantries (e.g., "Hello Chatbot", "I need you to help me with").
25
+ 3. Exclude specific variable values (e.g., "ID of my record is '31'") unless essential to the intent.
26
+ 4. Rephrase the query to match the format and keywords used in the docstrings, focusing on verbs and objects relevant to the action (e.g., "Add a record to a collection").
27
+ 5. Given the query the user might need more than one action to achieve his goal. In this case the rewritten query has more than one action.
28
+
29
+ Examples:
30
+ - User query: "Create a Python script with a method that facilitates the creation of records. This method should accept an array of identifiers as a parameter and allow metadata to be added to each record."
31
+ - Rewritten query: "create records, add metadata to record"
32
+ - User query: "Hi, can you help me write Python code to add a record to a collection? The record ID is '45', and the collection ID is '12'."
33
+ Rewritten query: "add a record to a collection"
34
+ - User query: I need a python script with which i create a new record with the title: "Hello World" and then link the record to a given collection.
35
+ Rewritten query: "create a new record with title" , "link a record to a collection"
36
+
37
+ Based on these examples and guidelines, rewrite the following user query to align more effectively with the keywords used in the docstrings.
38
+ Do not include any addition comments, explanations, or text.
39
+
40
+ Original query:
41
+ {query}
42
+ """
43
+ )
44
+
45
+ rewritten_query_response = self.llm.invoke(rewrite_prompt)
46
+ rewritten_query = rewritten_query_response.content.strip()
47
+ return rewritten_query
48
+
49
+
50
+
51
  def predict_library_usage(self, query):
52
  """
53
  Use the LLM to predict the relevant library for the user's query.
 
80
  """
81
  Format the retrieved document and code contexts.
82
  """
83
+ doc_context = _format_kadi_api_doc_context(doc_contexts)
84
+ code_context = _format_kadi_apy_library_context(code_contexts)
85
 
86
  return doc_context, code_context
87
 
 
115
  """
116
  return self.llm.invoke(prompt).content
117
 
118
+
119
+
120
+ def _format_kadi_apy_library_context(docs):
121
+ doc_context = []
 
122
 
123
+ for doc in docs:
124
+ # Extract metadata information
125
+ class_info = doc.metadata.get("class", "Unknown Class")
126
+ type_info = doc.metadata.get("type", "Unknown Type")
127
+ source_info = doc.metadata.get("source", "Unknown Type")
128
+
129
+ print(":}\n\n", doc.page_content)
130
+ formatted_doc = f"# source: {source_info}\n# class: {class_info}\n# type: {type_info}\n{doc.page_content}\n\n\n"
131
+ doc_context.append(formatted_doc)
132
+
133
+ return doc_context
134
+
135
+
136
+ def _format_kadi_api_doc_context(docs):
137
+ doc_context = []
138
+
139
+ for doc in docs:
140
+ source_info = doc.metadata.get("source", "Unknown Type")
141
+ print(":}\n\n", doc.page_content)
142
+ formatted_doc = f"# source: {source_info}\n{doc.page_content}\n\n\n"
143
+ doc_context.append(formatted_doc)
144
 
145
+ return doc_context