from time import perf_counter from jinja2 import Template from backend.query_llm import generate from backend.semantic_search import qd_retriever template_string = """ Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Context: --- {% for doc in documents %} {{ doc.content }} --- {% endfor %} Query: {{ query }} """ template = Template(template_string) def rag(query, top_k=5): # Retrieve documents relevant to query document_start = perf_counter() documents = qd_retriever.retrieve(query, top_k=top_k) document_time = document_start - perf_counter() # Create Prompt prompt = template.render(documents=documents, query=query) # Query LLM with prompt based on relevant documents llm_start = perf_counter() result = generate(prompt=prompt, history='') llm_time = llm_start - perf_counter() times = (document_time, llm_time) return prompt, result