File size: 4,406 Bytes
c18ba93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
class RAGChain:
    
    def __init__(self, llm, vector_store):
        """
        Initialize the RAGChain with an LLM instance and a vector store.
        """
        self.llm = llm
        self.vector_store = vector_store

    def predict_library_usage(self, query):
        """
        Use the LLM to predict the relevant library for the user's query.
        """
        prompt = (
            f"""The query is: '{query}'.
                Based on the user's query, assist them by determining which technical document they should read to interact with the software named 'Kadi4Mat'. 
                There are three different technical documents to choose from:
                    - Document 1: Provides information on how to use a Python library to interact with the HTTP API of 'Kadi4Mat'.
                    - Document 2: Provides information on how to use a Python library to implement custom CLI commands to interact with 'Kadi4Mat'.
        
                Your task is to select the single most likely option. 
                    If Document 1 is the best choice, respond with 'kadi-apy python library'. 
                    If Document 2 is the best choice, respond with 'kadi-apy python cli library'. 
                Respond with only the exact corresponding option and do not include any additional comments, explanations, or text."
            """
        )
        return self.llm.predict(prompt)

    def retrieve_contexts(self, query, library_usage_prediction):
        """
        Retrieve relevant documents and source code based on the query and library usage prediction.
        """
        doc_contexts = self.vector_store.similarity_search(query, k=5, filter={"usage": "doc"})
        code_contexts = self.vector_store.similarity_search(query, k=5, filter={"usage": library_usage_prediction})
        
        return doc_contexts, code_contexts

    def format_context(self, doc_contexts, code_contexts):
        """
        Format the retrieved document and code contexts.
        """
        doc_context = format_kadi_api_doc_context(doc_contexts)
        code_context = format_kadi_apy_library_context(code_contexts)
        
        return doc_context, code_context

    def generate_response(self, query, doc_context, code_context):
        """
        Generate a response using the retrieved contexts and the LLM.
        """
        prompt = f"""You are an expert python developer. You are assisting in generating code for users who want to programmatically 
                    make use of api of a software. There is a specific Python library named "kadiAPY" designed to interact with 
                    the API of the software. It provides an object-oriented approach for interfacing with the API.

                    You are given "Documentation Snippets" and "Code Snippets"
                        "Documentation Snippets:" Contains a collection of potentially useful snippets, including code examples and documentation excerpts of "kadiAPY"
                        "Code Snippets:" Contains potentially useful snippets from the source code of "kadiAPY"
                        
                    Based on the retrieved snippets and the guidelines answer the "query".  
                        
                        General Guidelines:
                        - If no related information is found from the snippets to answer the query, reply that you do not know.
                        
                        Guidelines when generating code:
                        - First display the full code and then follow with a well structured explanation of the generated code.
                        
                Documentation Snippets:
                {doc_context}
                Code Snippets:
                {code_context}
                
                Query: 
                {query}
        """
        return self.llm.invoke(prompt).content

    def rag_workflow(self, query):
        """
        Complete the RAG workflow: predict library usage, retrieve contexts, and generate a response.
        """
        library_usage_prediction = self.predict_library_usage(query)
        
        doc_contexts, code_contexts = self.retrieve_contexts(query, library_usage_prediction)
        doc_context, code_context = self.format_context(doc_contexts, code_contexts)
        
        return self.generate_response(query, doc_context, code_context)