Spaces:
Sleeping
Sleeping
Update ragchain.py
Browse files- ragchain.py +69 -10
ragchain.py
CHANGED
@@ -7,6 +7,47 @@ class RAGChain:
|
|
7 |
self.llm = llm
|
8 |
self.vector_store = vector_store
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
def predict_library_usage(self, query):
|
11 |
"""
|
12 |
Use the LLM to predict the relevant library for the user's query.
|
@@ -39,8 +80,8 @@ class RAGChain:
|
|
39 |
"""
|
40 |
Format the retrieved document and code contexts.
|
41 |
"""
|
42 |
-
doc_context =
|
43 |
-
code_context =
|
44 |
|
45 |
return doc_context, code_context
|
46 |
|
@@ -74,13 +115,31 @@ class RAGChain:
|
|
74 |
"""
|
75 |
return self.llm.invoke(prompt).content
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
library_usage_prediction = self.predict_library_usage(query)
|
82 |
|
83 |
-
|
84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
-
return
|
|
|
7 |
self.llm = llm
|
8 |
self.vector_store = vector_store
|
9 |
|
10 |
+
|
11 |
+
def rewrite_query(self, query):
|
12 |
+
"""
|
13 |
+
Rewrite the user's query to align with the language and structure of the library's methods and documentation.
|
14 |
+
"""
|
15 |
+
rewrite_prompt = (
|
16 |
+
f"""You are an intelligent assistant that helps users rewrite their queries.
|
17 |
+
The vectorstore consists of the source code and documentation of a Python library, which enables users to
|
18 |
+
programmatically interact with a REST-like API of a software system. The library methods have descriptive
|
19 |
+
docstrings. Your task is to rewrite the query in a way that aligns with the language and structure of the
|
20 |
+
library's methods and documentation, ensuring optimal retrieval of relevant information.
|
21 |
+
|
22 |
+
Guidelines for rewriting the query:
|
23 |
+
1. Identify the main action the user wants to perform (e.g., "Upload a file to a record," "Get users of a group").
|
24 |
+
2. Remove conversational elements like greetings or pleasantries (e.g., "Hello Chatbot", "I need you to help me with").
|
25 |
+
3. Exclude specific variable values (e.g., "ID of my record is '31'") unless essential to the intent.
|
26 |
+
4. Rephrase the query to match the format and keywords used in the docstrings, focusing on verbs and objects relevant to the action (e.g., "Add a record to a collection").
|
27 |
+
5. Given the query the user might need more than one action to achieve his goal. In this case the rewritten query has more than one action.
|
28 |
+
|
29 |
+
Examples:
|
30 |
+
- User query: "Create a Python script with a method that facilitates the creation of records. This method should accept an array of identifiers as a parameter and allow metadata to be added to each record."
|
31 |
+
- Rewritten query: "create records, add metadata to record"
|
32 |
+
- User query: "Hi, can you help me write Python code to add a record to a collection? The record ID is '45', and the collection ID is '12'."
|
33 |
+
Rewritten query: "add a record to a collection"
|
34 |
+
- User query: I need a python script with which i create a new record with the title: "Hello World" and then link the record to a given collection.
|
35 |
+
Rewritten query: "create a new record with title" , "link a record to a collection"
|
36 |
+
|
37 |
+
Based on these examples and guidelines, rewrite the following user query to align more effectively with the keywords used in the docstrings.
|
38 |
+
Do not include any addition comments, explanations, or text.
|
39 |
+
|
40 |
+
Original query:
|
41 |
+
{query}
|
42 |
+
"""
|
43 |
+
)
|
44 |
+
|
45 |
+
rewritten_query_response = self.llm.invoke(rewrite_prompt)
|
46 |
+
rewritten_query = rewritten_query_response.content.strip()
|
47 |
+
return rewritten_query
|
48 |
+
|
49 |
+
|
50 |
+
|
51 |
def predict_library_usage(self, query):
|
52 |
"""
|
53 |
Use the LLM to predict the relevant library for the user's query.
|
|
|
80 |
"""
|
81 |
Format the retrieved document and code contexts.
|
82 |
"""
|
83 |
+
doc_context = _format_kadi_api_doc_context(doc_contexts)
|
84 |
+
code_context = _format_kadi_apy_library_context(code_contexts)
|
85 |
|
86 |
return doc_context, code_context
|
87 |
|
|
|
115 |
"""
|
116 |
return self.llm.invoke(prompt).content
|
117 |
|
118 |
+
|
119 |
+
|
120 |
+
def _format_kadi_apy_library_context(docs):
|
121 |
+
doc_context = []
|
|
|
122 |
|
123 |
+
for doc in docs:
|
124 |
+
# Extract metadata information
|
125 |
+
class_info = doc.metadata.get("class", "Unknown Class")
|
126 |
+
type_info = doc.metadata.get("type", "Unknown Type")
|
127 |
+
source_info = doc.metadata.get("source", "Unknown Type")
|
128 |
+
|
129 |
+
print(":}\n\n", doc.page_content)
|
130 |
+
formatted_doc = f"# source: {source_info}\n# class: {class_info}\n# type: {type_info}\n{doc.page_content}\n\n\n"
|
131 |
+
doc_context.append(formatted_doc)
|
132 |
+
|
133 |
+
return doc_context
|
134 |
+
|
135 |
+
|
136 |
+
def _format_kadi_api_doc_context(docs):
|
137 |
+
doc_context = []
|
138 |
+
|
139 |
+
for doc in docs:
|
140 |
+
source_info = doc.metadata.get("source", "Unknown Type")
|
141 |
+
print(":}\n\n", doc.page_content)
|
142 |
+
formatted_doc = f"# source: {source_info}\n{doc.page_content}\n\n\n"
|
143 |
+
doc_context.append(formatted_doc)
|
144 |
|
145 |
+
return doc_context
|