In [2]:
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from backend.app.vectorstore import get_vector_db

[nltk_data] Downloading package punkt_tab to
[nltk_data] /Users/ryanrodriguez/nltk_data...
[nltk_data] Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data] /Users/ryanrodriguez/nltk_data...
[nltk_data] Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data] date!


In [3]:
SYSTEM_ROLE_PROMPT = """
 You are a knowledgeable grading assistant that evaluates student answers based on provided context.
 You should determine if answers are correct and provide constructive feedback.
"""

USER_ROLE_PROMPT = """
 Grade the following student answer based on the provided context about {query}.
 
 Context: {context}
 
 Question: {problem}
 Student Answer: {answer}
 
 Evaluate if the answer is correct and provide brief feedback. Start with either "Correct" or "Incorrect" 
 followed by a brief explanation of why. Focus on the accuracy based on the context provided.
 
 Always begin your response with "Correct" or "Incorrect" and then provide a brief explanation of why.

 Your response should be direct and clear, for example:
 "Correct. The answer accurately explains [reason]" or 
 "Incorrect. While [partial understanding], the answer misses [key point]"
"""

In [5]:
chat_prompt = ChatPromptTemplate.from_messages([
 ("system", SYSTEM_ROLE_PROMPT),
 ("user", USER_ROLE_PROMPT)
])

openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.7)

retriever = get_vector_db().as_retriever(search_kwargs={"k": 2})


In [16]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from operator import itemgetter

simple_rag = (
 {
 # Use the query to retrieve documents from the vectorstore
 "context": itemgetter("query") | retriever | (lambda docs: "\n\n".join([doc.page_content for doc in docs])),
 # Pass through all other inputs directly
 "query": itemgetter("query"),
 "problem": itemgetter("problem"),
 "answer": itemgetter("answer")
 } 
 | chat_prompt
 | openai_chat_model
 | StrOutputParser()
)

In [17]:
raw_result = simple_rag.invoke(
 {
 "query": "RAG",
 "problem": "What is the purpose of the indexing component in a RAG application?",
 "answer": "The indexing component is used to store and retrieve documents efficiently."
 }
)
raw_result

'Incorrect. While the answer mentions storing and retrieving documents, it misses the key point that the purpose of the indexing component in a RAG application is to ingest data from a source, index it, and facilitate efficient retrieval of relevant data at runtime.'

In [23]:
import json
result = json.loads(raw_result)
result["questions"]

['What are the two main components of a typical RAG application?',
 'What is the purpose of the indexing component in a RAG application?',
 "What are the steps involved in the 'Load' phase of indexing?",
 'Why is splitting text into smaller chunks important in the context of RAG applications?',
 'How does the retrieval and generation component of a RAG application process user queries?']

In [None]:
({
 "context": retriever,
 "query": RunnablePassthrough(),
 "problem": RunnablePassthrough(),
 "answer": RunnablePassthrough(),
}
| chat_prompt).invoke(
 {
 "query": "RAG",
 "problem": "What is the purpose of the indexing component in a RAG application?",
 "answer": "The indexing component is used to store and retrieve documents efficiently."
 }
)

TypeError: argument 'text': 'dict' object cannot be converted to 'PyString'

In [15]:
retriever.invoke("RAG")

[Document(metadata={'source': 'static/data/langchain_rag_tutorial.html', '_id': 'f1d61e037f6240d3a3671e435f9d5726', '_collection_name': 'extending_context_window_llama_3'}, page_content="Part 1 (this guide) introduces RAG and walks through a minimal implementation.\n\nPart 2 extends the implementation to accommodate conversation-style interactions and multi-step retrieval processes.\n\nThis tutorial will show how to build a simple Q&A application\nover a text data source. Along the way we’ll go over a typical Q&A\narchitecture and highlight additional resources for more advanced Q&A techniques. We’ll also see\nhow LangSmith can help us trace and understand our application.\nLangSmith will become increasingly helpful as our application grows in\ncomplexity.\n\nIf you're already familiar with basic retrieval, you might also be interested in\nthis high-level overview of different retrieval techniques.\n\nNote: Here we focus on Q&A for unstructured data. If you are interested for RAG over 