Spaces:

bupa1018
/

KadiAPY_Coding_Assistant

Sleeping

App Files Files Community

bupa1018 commited on Mar 30

Commit

47a0ac2

verified ·

1 Parent(s): 74f1ba5

Update kadiApy_ragchain.py

Browse files

Files changed (1) hide show

kadiApy_ragchain.py +99 -38

kadiApy_ragchain.py CHANGED Viewed

@@ -27,8 +27,12 @@ class KadiApyRagchain:
         print("Start retrieving:")
         #doc_contexts = self.retrieve_contexts(query, k=2, filter={"dataset_category": "kadi_apy_docs"})
         #code_contexts = self.retrieve_contexts(rewritten_query, k=3, filter={"usage": code_library_usage_prediction})
-        context = self.retrieve_contexts(query, k=5)
         # Vanilla
         #doc_contexts = self.retrieve_contexts(query, k=3, filter={"dataset_category": "kadi_apy_docs"})
         #code_contexts = self.retrieve_contexts(query, k=5, filter={"dataset_category": "kadi_apy_source_code"})
@@ -39,14 +43,13 @@ class KadiApyRagchain:
         # Format contexts
         print("Formatting docs:")
-       # formatted_doc_contexts = self.format_documents(doc_contexts)
-       # formatted_code_contexts = self.format_documents(code_contexts)
-        formatted_contexts = self.format_documents(context)
         # Generate response
         print("Start generatin repsonsse:")
-       # response = self.generate_response(query, chat_history, formatted_doc_contexts, formatted_code_contexts)
-        response = self.generate_response(query, chat_history, formatted_contexts)
         # Add the response to the existing query in the conversation history
         #self.add_to_conversation(llm_response=response)
@@ -109,6 +112,38 @@ class KadiApyRagchain:
         )
         return self.llm.invoke(rewrite_prompt).content
     def predict_library_usage(self, query):
         """
@@ -136,7 +171,44 @@ class KadiApyRagchain:
         context = self.vector_store.similarity_search(query = query, k=k, filter=filter)
         return context
-    # def generate_response(self, query, chat_history, doc_context, code_context):
     #     """
     #     Generate a response using the retrieved contexts and the LLM.
     #     """
@@ -146,8 +218,8 @@ class KadiApyRagchain:
     #     prompt = f"""
     #         You are a Python programming assistant specialized in the "Kadi-APY" library.
     #         The "Kadi-APY" library is a Python package designed to facilitate interaction with the REST-like API of a software platform called Kadi4Mat.
-    #         Your task is to answer the user's query based on the guidelines, and if needed, combine understanding provided by
-    #         "Document Snippets" with the implementation details provided by "Code Snippets."
     #         Guidelines if generating code:
     #             - Display the complete code first, followed by a concise explanation in no more than 5 sentences.
@@ -161,11 +233,8 @@ class KadiApyRagchain:
     #         Chat History:
     #         {formatted_history}
-    #         Document Snippets:
-    #         {doc_context}
-    #         Code Snippets:
-    #         {code_context}
     #         Query:
     #         {query}
@@ -173,41 +242,33 @@ class KadiApyRagchain:
     #     return self.llm.invoke(prompt).content
-    def generate_response(self, query, chat_history, context):
-        """
-        Generate a response using the retrieved contexts and the LLM.
-        """
-        formatted_history = self.format_history(chat_history)
-        # Update the prompt with history included
         prompt = f"""
             You are a Python programming assistant specialized in the "Kadi-APY" library.
             The "Kadi-APY" library is a Python package designed to facilitate interaction with the REST-like API of a software platform called Kadi4Mat.
-            Your task is to answer the user's query based on the guidelines, and if needed, combine understanding provided by "Context"
-            "Context" contains snippets from the source code and/or code examples
-            Guidelines if generating code:
-                - Display the complete code first, followed by a concise explanation in no more than 5 sentences.
-            General Guidelines:
-                - Refer to the "Chat History" if it provides context that could enhance your understanding of the user's query.
-                - Always include the "Chat History" if relevant to the user's query for continuity and clarity in responses.
-                - If the user's query cannot be fulfilled based on the provided snippets, reply with "The API does not support the requested functionality."
-                - If the user's query does not implicate any task, reply with a question asking the user to elaborate.
-            Chat History:
-            {formatted_history}
             Context:
             {context}
-            Query:
-            {query}
         """
-        return self.llm.invoke(prompt).content
     def format_documents(self, documents):
         formatted_docs = []
         for i, doc in enumerate(documents, start=1):

         print("Start retrieving:")
         #doc_contexts = self.retrieve_contexts(query, k=2, filter={"dataset_category": "kadi_apy_docs"})
         #code_contexts = self.retrieve_contexts(rewritten_query, k=3, filter={"usage": code_library_usage_prediction})
+        code_contexts = self.retrieve_contexts(query, k=2)
+        query_formulated_question= formulate_question(code_contexts)
+        print("question": query_formulated_question)
+        doc_contexts = self.retrieve_contexts(query_formulated_question, k=2, filter={"dataset_category": "kadi_apy_docs"})
         # Vanilla
         #doc_contexts = self.retrieve_contexts(query, k=3, filter={"dataset_category": "kadi_apy_docs"})
         #code_contexts = self.retrieve_contexts(query, k=5, filter={"dataset_category": "kadi_apy_source_code"})
         # Format contexts
         print("Formatting docs:")
+        formatted_doc_contexts = self.format_documents(doc_contexts)
+        formatted_code_contexts = self.format_documents(code_contexts)
         # Generate response
         print("Start generatin repsonsse:")
+        response = self.generate_response(query, chat_history, formatted_doc_contexts, formatted_code_contexts)
+        #response = self.generate_response(query, chat_history, formatted_contexts)
         # Add the response to the existing query in the conversation history
         #self.add_to_conversation(llm_response=response)
         )
         return self.llm.invoke(rewrite_prompt).content
+    def ask_rag(self, context):
+        rewrite_prompt = (
+            f""" "Code snippets" consists of methods and/or class declaration from the source code of an API Wrapper Python library.
+                  Your task is to write a question how to use
+                Guidelines for rewriting the query:
+                    1. Identify the main action the user wants to perform (e.g., "Upload a file to a record," "Get users of a group").
+                    2. Remove conversational elements like greetings or pleasantries (e.g., "Hello Chatbot", "I need you to help me with").
+                    3. Exclude specific variable values (e.g., "ID of my record is '31'") unless essential to the intent.
+                    4. Rephrase the query to match the format and keywords used in the docstrings, focusing on verbs and objects relevant to the action (e.g., "Add a record to a collection").
+                    5. Given the query the user might need more than one action to achieve his goal. In this case the rewritten query has more than one action.
+                    Examples:
+                        - User query: "Create a Python script with a method that facilitates the creation of records. This method should accept an array of identifiers as a parameter and allow metadata to be added to each record."
+                        - Rewritten query: "create records, add metadata to record"
+                        - User query: "Hi, can you help me write Python code to add a record to a collection? The record ID is '45', and the collection ID is '12'."
+                          Rewritten query: "add a record to a collection"
+                        - User query: I need a python script with which i create a new record with the title: "Hello World"  and then link the record to a given collection.
+                          Rewritten query: "create a new record with title" , "link a record to a collection"
+                    Based on these examples and guidelines, rewrite the following user query to align more effectively with the keywords used in the docstrings.
+                    Do not include any addition comments, explanations, or text.
+                    Original query:
+                    {query}
+            """
+        )
+        return self.llm.invoke(rewrite_prompt).content
     def predict_library_usage(self, query):
         """
         context = self.vector_store.similarity_search(query = query, k=k, filter=filter)
         return context
+    def generate_response(self, query, chat_history, doc_context, code_context):
+        """
+        Generate a response using the retrieved contexts and the LLM.
+        """
+        formatted_history = self.format_history(chat_history)
+        # Update the prompt with history included
+        prompt = f"""
+            You are a Python programming assistant specialized in the "Kadi-APY" library.
+            The "Kadi-APY" library is a Python package designed to facilitate interaction with the REST-like API of a software platform called Kadi4Mat.
+            Your task is to answer the user's query based on the guidelines, and if needed, combine understanding provided by
+            "Document Snippets" with the implementation details provided by "Code Snippets."
+            Guidelines if generating code:
+                - Display the complete code first, followed by a concise explanation in no more than 5 sentences.
+            General Guidelines:
+                - Refer to the "Chat History" if it provides context that could enhance your understanding of the user's query.
+                - Always include the "Chat History" if relevant to the user's query for continuity and clarity in responses.
+                - If the user's query cannot be fulfilled based on the provided snippets, reply with "The API does not support the requested functionality."
+                - If the user's query does not implicate any task, reply with a question asking the user to elaborate.
+            Chat History:
+            {formatted_history}
+            Document Snippets:
+            {doc_context}
+            Code Snippets:
+            {code_context}
+            Query:
+            {query}
+        """
+        return self.llm.invoke(prompt).content
+    # def generate_response(self, query, chat_history, context):
     #     """
     #     Generate a response using the retrieved contexts and the LLM.
     #     """
     #     prompt = f"""
     #         You are a Python programming assistant specialized in the "Kadi-APY" library.
     #         The "Kadi-APY" library is a Python package designed to facilitate interaction with the REST-like API of a software platform called Kadi4Mat.
+    #         Your task is to answer the user's query based on the guidelines, and if needed, combine understanding provided by "Context"
+    #         "Context" contains snippets from the source code and/or code examples
     #         Guidelines if generating code:
     #             - Display the complete code first, followed by a concise explanation in no more than 5 sentences.
     #         Chat History:
     #         {formatted_history}
+    #         Context:
+    #         {context}
     #         Query:
     #         {query}
     #     return self.llm.invoke(prompt).content
+    def formulate_question(self, source_code):
+    """
+    Generate a response using the retrieved contexts and the LLM.
+    """
         prompt = f"""
             You are a Python programming assistant specialized in the "Kadi-APY" library.
             The "Kadi-APY" library is a Python package designed to facilitate interaction with the REST-like API of a software platform called Kadi4Mat.
+            Your task is to formulate the next logical question a programmer would ask themselves to implement and run the method provided in the "context".
+            "Context" contains snippets from the source code and metadata that provide details about the method.
+            Guidelines for generating questions:
+                - The question should be specific to the programmer's intent of using the method within a Python script.
+                - Focus on determining the entry point of the class to which the method belongs.
+                - Avoid vague or general questions; be precise about the next actionable steps.
             Context:
             {context}
         """
+    return self.llm.invoke(prompt).content
     def format_documents(self, documents):
         formatted_docs = []
         for i, doc in enumerate(documents, start=1):