File size: 13,018 Bytes
8d1e83e
d594a38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d1e83e
d594a38
 
3f21537
7406911
8d1e83e
 
 
 
 
 
 
d594a38
 
2f49709
 
 
df527c8
9847233
d594a38
df527c8
2f49709
d803be1
d594a38
7e9684b
8d1e83e
 
 
7e9684b
8d1e83e
 
7e9684b
8d1e83e
 
 
 
 
d803be1
 
7406911
8d1e83e
9847233
d803be1
9847233
 
7406911
8d1e83e
 
 
 
 
 
d803be1
9847233
7406911
8d1e83e
 
9847233
8d1e83e
 
c7143b1
8d1e83e
 
9847233
8d1e83e
 
c7143b1
8d1e83e
 
9847233
8d1e83e
 
9847233
8d1e83e
 
9847233
8d1e83e
 
9847233
258cebf
 
9847233
6f80de5
 
 
 
 
 
8d1e83e
 
 
7406911
8d1e83e
7406911
8d1e83e
 
 
7e9684b
d594a38
d803be1
 
d594a38
7406911
 
 
 
 
 
 
 
 
 
 
d594a38
d803be1
 
7406911
d803be1
 
7406911
d803be1
 
7406911
d803be1
 
 
7406911
d803be1
d594a38
8d1e83e
7e9684b
8d1e83e
 
 
 
7e9684b
3f21537
8d1e83e
 
 
3f21537
8d1e83e
7406911
8d1e83e
 
 
 
 
 
 
 
 
7406911
d803be1
 
 
3f21537
 
8d1e83e
 
 
 
 
 
 
7406911
8d1e83e
 
 
 
 
 
 
 
 
 
 
 
d594a38
 
7406911
 
 
 
 
 
 
 
 
d594a38
 
 
 
 
 
 
 
 
 
 
 
7406911
 
 
 
 
 
 
 
 
 
 
 
 
d594a38
 
 
 
 
 
 
 
 
 
 
9847233
 
 
 
 
 
 
7406911
9847233
2f49709
9847233
2f49709
9847233
d803be1
2f49709
 
 
9847233
 
 
 
 
2f49709
 
 
9847233
7406911
 
9847233
7406911
9847233
 
 
 
 
 
 
 
 
 
 
 
258cebf
d803be1
5cba10a
9847233
d594a38
6f80de5
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
"""
Module for performing retrieval-augmented generation (RAG) using LangChain.
This module provides functions to optimize search queries, retrieve relevant documents,
and generate answers to questions using the retrieved context. It leverages the LangChain
library for building the RAG pipeline.
Functions:
- get_optimized_search_messages(query: str) -> list:
Generate optimized search messages for a given query.
- optimize_search_query(chat_llm, query: str, callbacks: list = []) -> str:
Optimize the search query using the chat language model.
- get_rag_prompt_template() -> ChatPromptTemplate:
Get the prompt template for retrieval-augmented generation (RAG).
- format_docs(docs: list) -> str:
Format the retrieved documents into a JSON string.
- multi_query_rag(chat_llm, question: str, search_query: str, vectorstore, callbacks: list = []) -> str:
Perform RAG using multiple queries to retrieve relevant documents.
- query_rag(chat_llm, question: str, search_query: str, vectorstore, callbacks: list = []) -> str:
Perform RAG using a single query to retrieve relevant documents.
"""
import os
import json
from datetime import datetime
from docopt import re
from langchain.schema import SystemMessage, HumanMessage
from langchain.prompts.chat import (
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
    ChatPromptTemplate
)
from langchain.prompts.prompt import PromptTemplate
from langchain.retrievers.multi_query import MultiQueryRetriever

from langchain_aws import BedrockEmbeddings
from langchain_aws.chat_models.bedrock_converse import ChatBedrockConverse
from langchain_cohere import ChatCohere
from langchain_fireworks.chat_models import ChatFireworks
from langchain_groq.chat_models import ChatGroq
from langchain_openai import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_ollama.chat_models import ChatOllama
from langsmith import traceable


def get_optimized_search_messages(query):
    """
    Generate optimized search messages for a given query.

    Args:
        query (str): The user's query.

    Returns:
        list: A list containing the system message and human message for optimized search.
    """
    system_message = SystemMessage(
        content="""
            You are a prompt optimizer for web search. Your task is to take a given chat prompt or question and transform it into an optimized search string that will yield the most relevant and useful information from a search engine like Google.
            The goal is to create a search query that will help users find the most accurate and pertinent information related to their original prompt or question. An effective search string should be concise, use relevant keywords, and leverage search engine syntax for better results.

            To optimize the prompt:
            - Identify the key information being requested
            - Consider any implicit information or context that might be useful for the search.
            - Arrange the keywords into a concise search string
            - Put the most important keywords first

            Some tips and things to be sure to remove:
            - Remove any conversational or instructional phrases
            - Removed style such as "in the style of", "engaging", "short", "long"
            - Remove lenght instruction (example: essay, article, letter, blog, post, blogpost, etc)
            - Remove style instructions (exmaple: "in the style of", engaging, short, long)
            - Remove lenght instruction (example: essay, article, letter, etc)

            You should answer only with the optimized search query and add "**" to the end of the search string to indicate the end of the query

            Example:
                Question: How do I bake chocolate chip cookies from scratch?
                chocolate chip cookies recipe from scratch**
            Example:
                Question: I would like you to show me a timeline of Marie Curie's life. Show results as a markdown table
                Marie Curie timeline**
            Example:
                Question: I would like you to write a long article on NATO vs Russia. Use known geopolitical frameworks.
                geopolitics nato russia**
            Example:
                Question: Write an engaging LinkedIn post about Andrew Ng
                Andrew Ng**
            Example:
                Question: Write a short article about the solar system in the style of Carl Sagan
                solar system**
            Example:
                Question: Biography of Napoleon. Include a table with the major events.
                napoleon biography events**
            Example:
                Question: Write a short article on the history of the United States. Include a table with the major events.
                united states history events**
            Example:
                Question: Write a short article about the solar system in the style of donald trump
                solar system**
            Exmaple:
                Question: Write a short linkedin about how the "freakeconomics" book previsions didn't pan out
                freakeconomics book predictions failed**
            Example:
                Question: Write an LinkedIn post about startup M&A in the style of Andrew Ng
                startup M&A**
            Example:
                Question: Write a linked post about the current state of M&A for startups. Write in the style of Russ from Silicon Valley TV show.
                startup current state M&A**
        """
    )
    human_message = HumanMessage(
        content=f"""
            Question: {query}

        """
    )
    return [system_message, human_message]



@traceable(run_type="llm", name="optimize_search_query")
def optimize_search_query(chat_llm, query, callbacks=[]):
    """
    Optimize the search query using the chat language model.

    Args:
        chat_llm: The chat language model to use.
        query (str): The user's query.
        callbacks (list): Optional callbacks for tracing.

    Returns:
        str: The optimized search query.
    """
    messages = get_optimized_search_messages(query)
    response = chat_llm.invoke(messages)
    optimized_search_query = response.content.strip()

    # Split by '**' and take the first part, then strip whitespace
    optimized_search_query = optimized_search_query.split("**", 1)[0].strip()

    # Remove surrounding quotes if present
    optimized_search_query = optimized_search_query.strip('"')

    # If the result is empty, fall back to the original query
    if not optimized_search_query:
        optimized_search_query = query

    return optimized_search_query

def get_rag_prompt_template():
    """
    Get the prompt template for Retrieval-Augmented Generation (RAG).

    Returns:
        ChatPromptTemplate: The prompt template for RAG.
    """
    today = datetime.now().strftime("%Y-%m-%d")
    system_prompt = SystemMessagePromptTemplate(
        prompt=PromptTemplate(
            input_variables=[],
            template=f"""
                You are an expert research assistant.
                You are provided with a Context in JSON format and a Question.
                Each JSON entry contains: content, title, link

                Use RAG to answer the Question, providing references and links to the Context material you retrieve and use in your answer:
                When generating your answer, follow these steps:
                - Retrieve the most relevant context material from your knowledge base to help answer the question
                - Cite the references you use by including the title, author, publication, and a link to each source
                - Synthesize the retrieved information into a clear, informative answer to the question
                - Format your answer in Markdown, using heading levels 2-3 as needed
                - Include a "References" section at the end with the full citations and link for each source you used

                If the provided context is not relevant to the question, say it and answer with your internal knowledge.
                If you cannot answer the question using either the extracts or your internal knowledge, state that you don't have enough information to provide an accurate answer.
                If the information in the provided context is in contradiction with your internal knowledge, answer but warn the user about the contradiction.

                Today's date is {today}
            """
        )
    )
    human_prompt = HumanMessagePromptTemplate(
        prompt=PromptTemplate(
            input_variables=["context", "query"],
            template="""
                Context:
                ---------------------
                {context}
                ---------------------
                Question: {query}
                Answer:
            """
        )
    )
    return ChatPromptTemplate(
        input_variables=["context", "query"],
        messages=[system_prompt, human_prompt],
    )

def format_docs(docs):
    """
    Format the retrieved documents into a JSON string.

    Args:
        docs (list): A list of documents to format.

    Returns:
        str: The formatted documents as a JSON string.
    """
    formatted_docs = []
    for d in docs:
        content = d.page_content
        title = d.metadata['title']
        source = d.metadata['source']
        doc = {"content": content, "title": title, "link": source}
        formatted_docs.append(doc)
    docs_as_json = json.dumps(formatted_docs, indent=2, ensure_ascii=False)
    return docs_as_json


def multi_query_rag(chat_llm, question, search_query, vectorstore, callbacks = []):
    """
    Perform RAG using multiple queries to retrieve relevant documents.

    Args:
        chat_llm: The chat language model to use.
        question (str): The user's question.
        search_query (str): The search query to use.
        vectorstore: The vector store for document retrieval.
        callbacks (list): Optional callbacks for tracing.

    Returns:
        str: The generated answer to the question.
    """
    retriever_from_llm = MultiQueryRetriever.from_llm(
        retriever=vectorstore.as_retriever(), llm=chat_llm, include_original=True,
    )
    unique_docs = retriever_from_llm.get_relevant_documents(
        query=search_query, callbacks=callbacks, verbose=True
    )
    context = format_docs(unique_docs)
    prompt = get_rag_prompt_template().format(query=question, context=context)
    response = chat_llm.invoke(prompt, config={"callbacks": callbacks})
    return response.content

def get_context_size(chat_llm):
    if isinstance(chat_llm, ChatOpenAI):
        if chat_llm.model_name.startswith("gpt-4"):
            return 128000
        else:
            return 16385
    if isinstance(chat_llm, ChatFireworks):
        return 32768
    if isinstance(chat_llm, ChatGroq):
        return 32768
    if isinstance(chat_llm, ChatOllama):
        return 120000
    if isinstance(chat_llm, ChatCohere):
        return 120000
    if isinstance(chat_llm, ChatBedrockConverse):
        if chat_llm.model_id.startswith("meta.llama3-1"):
            return 128000
        if chat_llm.model_id.startswith("anthropic.claude-3"):
            return 200000
        if chat_llm.model_id.startswith("anthropic.claude"):
            return 100000
        if chat_llm.model_id.startswith("mistral"):
            if chat_llm.model_id.startswith("mistral.mistral.mistral-large-2407"):
                return 128000
            return 32000
    return 4096

@traceable(run_type="retriever")
def build_rag_prompt(chat_llm, question, search_query, vectorstore, top_k = 10, callbacks = []):
    prompt = ""
    done = False
    while not done:
        unique_docs = vectorstore.similarity_search(
            search_query, k=top_k, callbacks=callbacks, verbose=True)
        context = format_docs(unique_docs)
        prompt = get_rag_prompt_template().format(query=question, context=context)
        nbr_tokens = chat_llm.get_num_tokens(prompt)
        if  top_k <= 1 or nbr_tokens <= get_context_size(chat_llm) - 768:
            done = True
        else:
            top_k = int(top_k * 0.75)
    return prompt

@traceable(run_type="llm", name="query_rag")
def query_rag(chat_llm, question, search_query, vectorstore, top_k = 10, callbacks = []):
    prompt = build_rag_prompt(chat_llm, question, search_query, vectorstore, top_k=top_k, callbacks = callbacks)
    response = chat_llm.invoke(prompt, config={"callbacks": callbacks})
    # Ensure we're returning a string
    if isinstance(response.content, list):
        # If it's a list, join the elements into a single string
        return ' '.join(str(item) for item in response.content)
    elif isinstance(response.content, str):
        # If it's already a string, return it as is
        return response.content
    else:
        # If it's neither a list nor a string, convert it to a string
        return str(response.content)