File size: 7,271 Bytes
c18ba93
 
 
 
 
 
 
 
 
76069ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6734c75
76069ac
 
c18ba93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9575a3a
c18ba93
 
 
9575a3a
 
c18ba93
fe08d47
fed7cd4
2ff57e5
be9fc31
fed7cd4
 
45f969e
fed7cd4
940d9e6
45f969e
c2cc302
fed7cd4
940d9e6
fed7cd4
 
940d9e6
 
45f969e
4b58dd9
fed7cd4
 
c18ba93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259728c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
class RAGChain:
    
    def __init__(self, llm, vector_store):
        """
        Initialize the RAGChain with an LLM instance and a vector store.
        """
        self.llm = llm
        self.vector_store = vector_store

    
    def rewrite_query(self, query):
        """
        Rewrite the user's query to align with the language and structure of the library's methods and documentation.
        """
        rewrite_prompt = (
            f"""You are an intelligent assistant that helps users rewrite their queries.
                The vectorstore consists of the source code and documentation of a Python library, which enables users to 
                programmatically interact with a REST-like API of a software system. The library methods have descriptive 
                docstrings. Your task is to rewrite the query in a way that aligns with the language and structure of the 
                library's methods and documentation, ensuring optimal retrieval of relevant information.

                Guidelines for rewriting the query:
                    1. Identify the main action the user wants to perform (e.g., "Upload a file to a record," "Get users of a group").
                    2. Remove conversational elements like greetings or pleasantries (e.g., "Hello Chatbot", "I need you to help me with").
                    3. Exclude specific variable values (e.g., "ID of my record is '31'") unless essential to the intent.
                    4. Rephrase the query to match the format and keywords used in the docstrings, focusing on verbs and objects relevant to the action (e.g., "Add a record to a collection").
                    5. Given the query the user might need more than one action to achieve his goal. In this case the rewritten query has more than one action. 

                    Examples:
                        - User query: "Create a Python script with a method that facilitates the creation of records. This method should accept an array of identifiers as a parameter and allow metadata to be added to each record."
                        - Rewritten query: "create records, add metadata to record"
                        - User query: "Hi, can you help me write Python code to add a record to a collection? The record ID is '45', and the collection ID is '12'."
                          Rewritten query: "add a record to a collection"
                        - User query: I need a python script with which i create a new record with the title: "Hello World"  and then link the record to a given collection.
                          Rewritten query: "create a new record with title" , "link a record to a collection"

                    Based on these examples and guidelines, rewrite the following user query to align more effectively with the keywords used in the docstrings. 
                    Do not include any addition comments, explanations, or text.
                    
                    Original query:
                    {query}
            """
        )
        return self.llm.invoke(rewrite_prompt)

    
    def predict_library_usage(self, query):
        """
        Use the LLM to predict the relevant library for the user's query.
        """
        prompt = (
            f"""The query is: '{query}'.
                Based on the user's query, assist them by determining which technical document they should read to interact with the software named 'Kadi4Mat'. 
                There are three different technical documents to choose from:
                    - Document 1: Provides information on how to use a Python library to interact with the HTTP API of 'Kadi4Mat'.
                    - Document 2: Provides information on how to use a Python library to implement custom CLI commands to interact with 'Kadi4Mat'.
        
                Your task is to select the single most likely option. 
                    If Document 1 is the best choice, respond with 'kadi-apy python library'. 
                    If Document 2 is the best choice, respond with 'kadi-apy python cli library'. 
                Respond with only the exact corresponding option and do not include any additional comments, explanations, or text."
            """
        )
        return self.llm.predict(prompt)

    def retrieve_contexts(self, query, k, filter = None):
        """
        Retrieve relevant documents and source code based on the query and library usage prediction.
        """
        context = self.vector_store.similarity_search(query = query, k=k, filter=filter)       
        return context

    def format_documents(self, documents):
        formatted_docs = []
        for i, doc in enumerate(documents, start=1):
            formatted_docs.append(f"Snippet {i}: \n")            
            formatted_docs.append("\n")
            all_metadata = doc.metadata
            
            metadata_str = ", ".join(f"{key}: {value}" for key, value in all_metadata.items())
            print("\n")
            print("------------------------------Beneath is retrieved doc------------------------------------------------")
            print(metadata_str)
            formatted_docs.append(metadata_str)
            print("\n")
            formatted_docs.append("\n")
            formatted_docs.append(doc.page_content)
            print(doc.page_content)
            print("\n\n")
            print("------------------------------End  of retrived doc------------------------------------------------")
            formatted_docs.append("\n\n\n")
            
        return formatted_docs

    def generate_response(self, query, doc_context, code_context):
        """
        Generate a response using the retrieved contexts and the LLM.
        """
        prompt = f"""You are an expert python developer. You are assisting in generating code for users who want to programmatically 
                    make use of api of a software. There is a specific Python library named "kadiAPY" designed to interact with 
                    the API of the software. It provides an object-oriented approach for interfacing with the API.

                    You are given "Documentation Snippets" and "Code Snippets"
                        "Documentation Snippets:" Contains a collection of potentially useful snippets, including code examples and documentation excerpts of "kadiAPY"
                        "Code Snippets:" Contains potentially useful snippets from the source code of "kadiAPY"
                        
                    Based on the retrieved snippets and the guidelines answer the "query".  
                        
                        General Guidelines:
                        - If no related information is found from the snippets to answer the query, reply that you do not know.
                        
                        Guidelines when generating code:
                        - First display the full code and then follow with a well structured explanation of the generated code.
                        
                Documentation Snippets:
                {doc_context}
                Code Snippets:
                {code_context}
                
                Query: 
                {query}
        """
        return self.llm.invoke(prompt).content