|
import getpass |
|
from dotenv import dotenv_values, load_dotenv |
|
|
|
|
|
|
|
config = dict(dotenv_values(".env")) |
|
load_dotenv(".env") |
|
|
|
|
|
import bs4 |
|
from langchain import hub |
|
from langchain_community.document_loaders import WebBaseLoader |
|
from langchain_community.vectorstores import Chroma |
|
from langchain_core.output_parsers import StrOutputParser |
|
from langchain_core.runnables import RunnablePassthrough |
|
from langchain_openai import ChatOpenAI, OpenAIEmbeddings |
|
from langchain_text_splitters import RecursiveCharacterTextSplitter |
|
|
|
|
|
loader = WebBaseLoader( |
|
web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",), |
|
bs_kwargs=dict( |
|
parse_only=bs4.SoupStrainer( |
|
class_=("post-content", "post-title", "post-header") |
|
) |
|
), |
|
) |
|
docs = loader.load() |
|
|
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) |
|
splits = text_splitter.split_documents(docs) |
|
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings()) |
|
|
|
|
|
retriever = vectorstore.as_retriever() |
|
prompt = hub.pull("rlm/rag-prompt") |
|
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) |
|
|
|
|
|
def format_docs(docs): |
|
return "\n\n".join(doc.page_content for doc in docs) |
|
|
|
|
|
rag_chain = ( |
|
{"context": retriever | format_docs, "question": RunnablePassthrough()} |
|
| prompt |
|
| llm |
|
| StrOutputParser() |
|
) |
|
|
|
print(rag_chain.invoke("What is Task Decomposition?")) |
|
|
|
|
|
|