Spaces:

bupa1018
/

KadiAPY_Coding_Assistant

Sleeping

App Files Files Community

bupa1018 commited on Mar 10

Commit

76069ac

1 Parent(s): 1c96354

Update ragchain.py

Browse files

Files changed (1) hide show

ragchain.py +69 -10

ragchain.py CHANGED Viewed

@@ -7,6 +7,47 @@ class RAGChain:
         self.llm = llm
         self.vector_store = vector_store
     def predict_library_usage(self, query):
         """
         Use the LLM to predict the relevant library for the user's query.
@@ -39,8 +80,8 @@ class RAGChain:
         """
         Format the retrieved document and code contexts.
         """
-        doc_context = format_kadi_api_doc_context(doc_contexts)
-        code_context = format_kadi_apy_library_context(code_contexts)
         return doc_context, code_context
@@ -74,13 +115,31 @@ class RAGChain:
         """
         return self.llm.invoke(prompt).content
-    def rag_workflow(self, query):
-        """
-        Complete the RAG workflow: predict library usage, retrieve contexts, and generate a response.
-        """
-        library_usage_prediction = self.predict_library_usage(query)
-        doc_contexts, code_contexts = self.retrieve_contexts(query, library_usage_prediction)
-        doc_context, code_context = self.format_context(doc_contexts, code_contexts)
-        return self.generate_response(query, doc_context, code_context)

         self.llm = llm
         self.vector_store = vector_store
+    def rewrite_query(self, query):
+        """
+        Rewrite the user's query to align with the language and structure of the library's methods and documentation.
+        """
+        rewrite_prompt = (
+            f"""You are an intelligent assistant that helps users rewrite their queries.
+                The vectorstore consists of the source code and documentation of a Python library, which enables users to
+                programmatically interact with a REST-like API of a software system. The library methods have descriptive
+                docstrings. Your task is to rewrite the query in a way that aligns with the language and structure of the
+                library's methods and documentation, ensuring optimal retrieval of relevant information.
+                Guidelines for rewriting the query:
+                    1. Identify the main action the user wants to perform (e.g., "Upload a file to a record," "Get users of a group").
+                    2. Remove conversational elements like greetings or pleasantries (e.g., "Hello Chatbot", "I need you to help me with").
+                    3. Exclude specific variable values (e.g., "ID of my record is '31'") unless essential to the intent.
+                    4. Rephrase the query to match the format and keywords used in the docstrings, focusing on verbs and objects relevant to the action (e.g., "Add a record to a collection").
+                    5. Given the query the user might need more than one action to achieve his goal. In this case the rewritten query has more than one action.
+                    Examples:
+                        - User query: "Create a Python script with a method that facilitates the creation of records. This method should accept an array of identifiers as a parameter and allow metadata to be added to each record."
+                        - Rewritten query: "create records, add metadata to record"
+                        - User query: "Hi, can you help me write Python code to add a record to a collection? The record ID is '45', and the collection ID is '12'."
+                          Rewritten query: "add a record to a collection"
+                        - User query: I need a python script with which i create a new record with the title: "Hello World"  and then link the record to a given collection.
+                          Rewritten query: "create a new record with title" , "link a record to a collection"
+                    Based on these examples and guidelines, rewrite the following user query to align more effectively with the keywords used in the docstrings.
+                    Do not include any addition comments, explanations, or text.
+                    Original query:
+                    {query}
+            """
+        )
+        rewritten_query_response = self.llm.invoke(rewrite_prompt)
+        rewritten_query = rewritten_query_response.content.strip()
+        return rewritten_query
     def predict_library_usage(self, query):
         """
         Use the LLM to predict the relevant library for the user's query.
         """
         Format the retrieved document and code contexts.
         """
+        doc_context = _format_kadi_api_doc_context(doc_contexts)
+        code_context = _format_kadi_apy_library_context(code_contexts)
         return doc_context, code_context
         """
         return self.llm.invoke(prompt).content
+    def _format_kadi_apy_library_context(docs):
+        doc_context = []
+        for doc in docs:
+            # Extract metadata information
+            class_info = doc.metadata.get("class", "Unknown Class")
+            type_info = doc.metadata.get("type", "Unknown Type")
+            source_info = doc.metadata.get("source", "Unknown Type")
+            print(":}\n\n", doc.page_content)
+            formatted_doc = f"# source: {source_info}\n# class: {class_info}\n# type: {type_info}\n{doc.page_content}\n\n\n"
+            doc_context.append(formatted_doc)
+        return doc_context
+    def _format_kadi_api_doc_context(docs):
+        doc_context = []
+        for doc in docs:
+            source_info = doc.metadata.get("source", "Unknown Type")
+            print(":}\n\n", doc.page_content)
+            formatted_doc = f"# source: {source_info}\n{doc.page_content}\n\n\n"
+            doc_context.append(formatted_doc)
+        return doc_context