ahncs commited on
Commit
6024b4b
·
1 Parent(s): 6d8cb27

Uploading App

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.chainlit/config.toml ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ # Whether to enable telemetry (default: true). No personal data is collected.
3
+ enable_telemetry = true
4
+
5
+ # List of environment variables to be provided by each user to use the app.
6
+ user_env = []
7
+
8
+ # Duration (in seconds) during which the session is saved when the connection is lost
9
+ session_timeout = 3600
10
+
11
+ # Enable third parties caching (e.g LangChain cache)
12
+ cache = false
13
+
14
+ # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
15
+ # follow_symlink = false
16
+
17
+ [features]
18
+ # Show the prompt playground
19
+ prompt_playground = true
20
+
21
+ # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
22
+ unsafe_allow_html = false
23
+
24
+ # Process and display mathematical expressions. This can clash with "$" characters in messages.
25
+ latex = false
26
+
27
+ # Authorize users to upload files with messages
28
+ multi_modal = true
29
+
30
+ # Allows user to use speech to text
31
+ [features.speech_to_text]
32
+ enabled = false
33
+ # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
34
+ # language = "en-US"
35
+
36
+ [UI]
37
+ # Name of the app and chatbot.
38
+ name = "Chatbot"
39
+
40
+ # Show the readme while the conversation is empty.
41
+ show_readme_as_default = true
42
+
43
+ # Description of the app and chatbot. This is used for HTML tags.
44
+ # description = ""
45
+
46
+ # Large size content are by default collapsed for a cleaner ui
47
+ default_collapse_content = true
48
+
49
+ # The default value for the expand messages settings.
50
+ default_expand_messages = false
51
+
52
+ # Hide the chain of thought details from the user in the UI.
53
+ hide_cot = false
54
+
55
+ # Link to your github repo. This will add a github button in the UI's header.
56
+ # github = ""
57
+
58
+ # Specify a CSS file that can be used to customize the user interface.
59
+ # The CSS file can be served from the public directory or via an external link.
60
+ # custom_css = "/public/test.css"
61
+
62
+ # Override default MUI light theme. (Check theme.ts)
63
+ [UI.theme.light]
64
+ #background = "#FAFAFA"
65
+ #paper = "#FFFFFF"
66
+
67
+ [UI.theme.light.primary]
68
+ #main = "#F80061"
69
+ #dark = "#980039"
70
+ #light = "#FFE7EB"
71
+
72
+ # Override default MUI dark theme. (Check theme.ts)
73
+ [UI.theme.dark]
74
+ #background = "#FAFAFA"
75
+ #paper = "#FFFFFF"
76
+
77
+ [UI.theme.dark.primary]
78
+ #main = "#F80061"
79
+ #dark = "#980039"
80
+ #light = "#FFE7EB"
81
+
82
+
83
+ [meta]
84
+ generated_by = "0.7.700"
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ OPENAI_API_KEY=sk-proj-AHkNxgunsygzWHhVl9c1T3BlbkFJO7prS6ckuhzhBZfwHZze
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11.9
2
+ RUN useradd -m -u 1000 user
3
+ USER user
4
+ ENV HOME=/home/user \
5
+ PATH=/home/user/.local/bin:$PATH
6
+ RUN mkdir -p $HOME/app/data/vectorstore && chown -R user:user $HOME/app/data
7
+ WORKDIR $HOME/app
8
+ COPY --chown=user . $HOME/app
9
+ COPY ./requirements.txt ~/app/requirements.txt
10
+ RUN pip install -r requirements.txt
11
+ COPY . .
12
+ CMD ["chainlit", "run", "app.py", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import openai
3
+ import chainlit as cl
4
+ from langchain_community.document_loaders import PyMuPDFLoader
5
+ from langchain_openai import OpenAIEmbeddings
6
+ from langchain_openai import ChatOpenAI
7
+ from langchain_community.vectorstores import Qdrant
8
+ from langchain.prompts import ChatPromptTemplate
9
+
10
+ from dotenv import load_dotenv
11
+ from operator import itemgetter
12
+ from langchain_huggingface import HuggingFaceEndpoint
13
+ from langchain_community.document_loaders import TextLoader
14
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
15
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
16
+ from langchain_community.vectorstores import FAISS
17
+ from langchain_huggingface import HuggingFaceEndpointEmbeddings
18
+ from langchain_core.prompts import PromptTemplate
19
+ from langchain.schema.output_parser import StrOutputParser
20
+ from langchain.schema.runnable import RunnablePassthrough
21
+ from langchain.schema.runnable.config import RunnableConfig
22
+
23
+ #Load environment variables
24
+ load_dotenv()
25
+ OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
26
+
27
+ #Load 10-K PDF and split into chunks
28
+ loader = PyMuPDFLoader (
29
+ "./data/AirBNB10kfilingsq12024.pdf"
30
+ )
31
+
32
+ documents = loader.load()
33
+
34
+ text_splitter = RecursiveCharacterTextSplitter(
35
+ chunk_size = 1000,
36
+ chunk_overlap = 100
37
+ )
38
+
39
+ documents = text_splitter.split_documents(documents)
40
+
41
+ #Load embeddings model - we'll use OpenAI's text-embedding-3-small
42
+ embeddings = OpenAIEmbeddings(
43
+ model="text-embedding-3-small"
44
+ )
45
+
46
+ #Create QDrant vector store
47
+ qdrant_vector_store = Qdrant.from_documents(
48
+ documents,
49
+ embeddings,
50
+ location=":memory:",
51
+ collection_name="AirBNB10k",
52
+ )
53
+
54
+ #Create Retriever
55
+ retriever = qdrant_vector_store.as_retriever()
56
+
57
+ #Create Prompt Template
58
+ template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':
59
+
60
+ Context:
61
+ {context}
62
+
63
+ Question:
64
+ {question}
65
+ """
66
+
67
+ prompt = ChatPromptTemplate.from_template(template)
68
+
69
+ #Choose LLM - we'll use gpt-4o.
70
+ primary_llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
71
+
72
+ #Set up Chainlit
73
+ @cl.author_rename
74
+ def rename(original_author: str):
75
+ """
76
+ This function can be used to rename the 'author' of a message.
77
+
78
+ In this case, we're overriding the 'Assistant' author to be 'Airbnb10kBot'.
79
+ """
80
+ rename_dict = {
81
+ "Assistant" : "Airbnb10kBot"
82
+ }
83
+ return rename_dict.get(original_author, original_author)
84
+
85
+ @cl.on_chat_start
86
+ async def start_chat():
87
+ """
88
+ This function will be called at the start of every user session.
89
+
90
+ We will build our LCEL RAG chain here, and store it in the user session.
91
+
92
+ The user session is a dictionary that is unique to each user session, and is stored in the memory of the server.
93
+ """
94
+ retrieval_augmented_chain = (
95
+ # INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"}
96
+ # "question" : populated by getting the value of the "question" key
97
+ # "context" : populated by getting the value of the "question" key and chaining it into the base_retriever
98
+ {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
99
+ | prompt | primary_llm
100
+ )
101
+
102
+ cl.user_session.set("retrieval_augmented_chain", retrieval_augmented_chain)
103
+
104
+ @cl.on_message
105
+ async def main(message: cl.Message):
106
+ """
107
+ This function will be called every time a message is recieved from a session.
108
+
109
+ We will use the LCEL RAG chain to generate a response to the user query.
110
+
111
+ The LCEL RAG chain is stored in the user session, and is unique to each user session - this is why we can access it here.
112
+ """
113
+ retrieval_augmented_chain = cl.user_session.get("retrieval_augmented_chain")
114
+
115
+ msg = cl.Message(content="")
116
+
117
+ async for chunk in retrieval_augmented_chain.astream(
118
+ {"question": message.content},
119
+ config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
120
+ ):
121
+ await msg.stream_token(chunk.content)
122
+
123
+ await msg.send()
chainlit.md ADDED
@@ -0,0 +1 @@
 
 
1
+ # AIE3 Midterm: RAG-in' on the AirBNB-10k Filing
data/AirBNB10kfilingsq12024.pdf ADDED
Binary file (596 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ chainlit==0.7.700
2
+ langchain==0.2.5
3
+ langchain_community==0.2.5
4
+ langchain_core==0.2.9
5
+ langchain_huggingface==0.0.3
6
+ langchain_text_splitters==0.2.1
7
+ python-dotenv==1.0.1
8
+ langchain-openai
9
+ langchainhub
10
+ openai
11
+ faiss-cpu
12
+ qdrant-client
13
+ pymupdf
14
+ pandas
testenvironment.ipynb ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "!pip install -U -q langchain langchain-openai langchain_core langchain-community langchainhub openai"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 3,
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "!pip install -qU qdrant-client pymupdf pandas"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 13,
24
+ "metadata": {},
25
+ "outputs": [
26
+ {
27
+ "name": "stdout",
28
+ "output_type": "stream",
29
+ "text": [
30
+ "2024-06-20 23:18:02 - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
31
+ "2024-06-20 23:18:03 - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
32
+ "2024-06-20 23:18:04 - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
33
+ "2024-06-20 23:18:05 - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
34
+ "2024-06-20 23:18:06 - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
35
+ ]
36
+ }
37
+ ],
38
+ "source": [
39
+ "import os\n",
40
+ "import openai\n",
41
+ "import chainlit as cl\n",
42
+ "from langchain_community.document_loaders import PyMuPDFLoader\n",
43
+ "from langchain_openai import OpenAIEmbeddings\n",
44
+ "from langchain_openai import ChatOpenAI\n",
45
+ "from langchain_community.vectorstores import Qdrant\n",
46
+ "from langchain.prompts import ChatPromptTemplate\n",
47
+ "\n",
48
+ "from dotenv import load_dotenv\n",
49
+ "from operator import itemgetter\n",
50
+ "from langchain_huggingface import HuggingFaceEndpoint\n",
51
+ "from langchain_community.document_loaders import TextLoader\n",
52
+ "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
53
+ "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
54
+ "from langchain_community.vectorstores import FAISS\n",
55
+ "from langchain_huggingface import HuggingFaceEndpointEmbeddings\n",
56
+ "from langchain_core.prompts import PromptTemplate\n",
57
+ "from langchain.schema.output_parser import StrOutputParser\n",
58
+ "from langchain.schema.runnable import RunnablePassthrough\n",
59
+ "from langchain.schema.runnable.config import RunnableConfig\n",
60
+ "\n",
61
+ "#Load environment variables\n",
62
+ "load_dotenv()\n",
63
+ "OPENAI_API_KEY = os.environ[\"OPENAI_API_KEY\"]\n",
64
+ "\n",
65
+ "#Load 10-K PDF and split into chunks\n",
66
+ "loader = PyMuPDFLoader (\n",
67
+ " \"./data/AirBNB10kfilingsq12024.pdf\"\n",
68
+ ")\n",
69
+ "\n",
70
+ "documents = loader.load()\n",
71
+ "\n",
72
+ "text_splitter = RecursiveCharacterTextSplitter(\n",
73
+ " chunk_size = 1000,\n",
74
+ " chunk_overlap = 100\n",
75
+ ")\n",
76
+ "\n",
77
+ "documents = text_splitter.split_documents(documents)\n",
78
+ "\n",
79
+ "#Load embeddings model - we'll use OpenAI's text-embedding-3-small\n",
80
+ "embeddings = OpenAIEmbeddings(\n",
81
+ " model=\"text-embedding-3-small\"\n",
82
+ ")\n",
83
+ "\n",
84
+ "#Create QDrant vector store\n",
85
+ "qdrant_vector_store = Qdrant.from_documents(\n",
86
+ " documents,\n",
87
+ " embeddings,\n",
88
+ " location=\":memory:\",\n",
89
+ " collection_name=\"AirBNB10k\",\n",
90
+ ")\n",
91
+ "\n",
92
+ "#Create Retriever\n",
93
+ "retriever = qdrant_vector_store.as_retriever()\n",
94
+ "\n",
95
+ "#Create Prompt Template\n",
96
+ "template = \"\"\"Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':\n",
97
+ "\n",
98
+ "Context:\n",
99
+ "{context}\n",
100
+ "\n",
101
+ "Question:\n",
102
+ "{question}\n",
103
+ "\"\"\"\n",
104
+ "\n",
105
+ "prompt = ChatPromptTemplate.from_template(template)\n",
106
+ "\n",
107
+ "#Choose LLM - we'll use gpt-4o.\n",
108
+ "primary_llm = ChatOpenAI(model_name=\"gpt-4o\", temperature=0)"
109
+ ]
110
+ },
111
+ {
112
+ "cell_type": "code",
113
+ "execution_count": 14,
114
+ "metadata": {},
115
+ "outputs": [
116
+ {
117
+ "name": "stdout",
118
+ "output_type": "stream",
119
+ "text": [
120
+ "2024-06-20 23:18:10 - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
121
+ ]
122
+ }
123
+ ],
124
+ "source": [
125
+ "retrieved_documents = retriever.invoke(\"What was the total value of 'Cash and cash equivalents' as of December 31, 2023?\")"
126
+ ]
127
+ },
128
+ {
129
+ "cell_type": "code",
130
+ "execution_count": 15,
131
+ "metadata": {},
132
+ "outputs": [
133
+ {
134
+ "name": "stdout",
135
+ "output_type": "stream",
136
+ "text": [
137
+ "page_content='Table of Contents\\nAirbnb, Inc.\\nNotes to Condensed Consolidated Financial Statements (unaudited)\\nNote 3. Supplemental Financial Statement Information\\nCash, Cash Equivalents, and Restricted Cash\\nThe following table reconciles cash, cash equivalents, and restricted cash reported on the Company’s unaudited condensed consolidated balance sheets to the total amount\\npresented in the unaudited condensed consolidated statements of cash flows (in millions):\\nDecember 31,\\n2023\\nMarch 31,\\n2024\\nCash and cash equivalents\\n$\\n6,874\\xa0 $\\n7,829\\xa0\\nCash and cash equivalents included in funds receivable and amounts held on behalf of customers\\n5,769\\xa0\\n8,665\\xa0\\nRestricted cash included in prepaids and other current assets\\n24\\xa0\\n35\\xa0\\nTotal cash, cash equivalents, and restricted cash presented in the unaudited condensed consolidated statements of cash flows\\n$\\n12,667\\xa0 $\\n16,529\\xa0\\nSupplemental disclosures of balance sheet information\\nSupplemental balance sheet information consisted of the following (in millions):\\nDecember 31,' metadata={'source': './data/AirBNB10kfilingsq12024.pdf', 'file_path': './data/AirBNB10kfilingsq12024.pdf', 'page': 10, 'total_pages': 54, 'format': 'PDF 1.4', 'title': '0001559720-24-000017', 'author': 'EDGAR® Online LLC, a subsidiary of OTC Markets Group', 'subject': 'Form 10-Q filed on 2024-05-08 for the period ending 2024-03-31', 'keywords': '0001559720-24-000017; ; 10-Q', 'creator': 'EDGAR Filing HTML Converter', 'producer': 'EDGRpdf Service w/ EO.Pdf 22.0.40.0', 'creationDate': \"D:20240508161757-04'00'\", 'modDate': \"D:20240508161807-04'00'\", 'trapped': '', 'encryption': 'Standard V2 R3 128-bit RC4', '_id': 'aeb9683f378a493bab25395753cc9fb5', '_collection_name': 'AirBNB10k'}\n",
138
+ "page_content='liabilities. We believe that our existing cash, cash equivalents, and short-term investments balances in the United States are sufficient to fund our working capital needs in the United\\nStates.\\nWe have access to $1.0 billion of commitments and a $200\\xa0million sub-limit for the issuance of letters of credit under the 2022 Credit Facility. As of March\\xa031, 2024, no amounts were\\ndrawn under the 2022 Credit Facility and outstanding letters of credit totaled $25 million.\\nMaterial Cash Requirements\\nAs of March\\xa031, 2024, we had outstanding $2.0\\xa0billion in aggregate principal amount of indebtedness of our 0% convertible senior notes due in 2026. On March 3, 2021, in\\nconnection with the pricing of the 2026 Notes, we entered into privately negotiated capped call transactions (the “Capped Calls”) with certain of the initial purchasers and other' metadata={'source': './data/AirBNB10kfilingsq12024.pdf', 'file_path': './data/AirBNB10kfilingsq12024.pdf', 'page': 28, 'total_pages': 54, 'format': 'PDF 1.4', 'title': '0001559720-24-000017', 'author': 'EDGAR® Online LLC, a subsidiary of OTC Markets Group', 'subject': 'Form 10-Q filed on 2024-05-08 for the period ending 2024-03-31', 'keywords': '0001559720-24-000017; ; 10-Q', 'creator': 'EDGAR Filing HTML Converter', 'producer': 'EDGRpdf Service w/ EO.Pdf 22.0.40.0', 'creationDate': \"D:20240508161757-04'00'\", 'modDate': \"D:20240508161807-04'00'\", 'trapped': '', 'encryption': 'Standard V2 R3 128-bit RC4', '_id': '19d099a757a5473c9b1eceafc54bc0cf', '_collection_name': 'AirBNB10k'}\n",
139
+ "page_content='unrealized loss position for more than twelve months as of December\\xa031, 2023 and March\\xa031, 2024, respectively.\\nThe following table summarizes the contractual maturities of the Company’s available-for-sale debt securities (in millions):\\nMarch 31, 2024\\nAmortized\\nCost\\nEstimated\\nFair Value\\nDue within one year\\n$\\n1,489\\xa0 $\\n1,489\\xa0\\nDue after one year through five years\\n957\\xa0\\n947\\xa0\\nDue after five years\\n96\\xa0\\n92\\xa0\\nTotal\\n$\\n2,542\\xa0 $\\n2,528\\xa0\\nNote 5. Fair Value Measurements and Financial Instruments\\nThe following table summarizes the Company’s financial assets and liabilities measured at fair value on a recurring basis (in millions):\\nDecember 31, 2023\\nLevel\\xa01\\nLevel\\xa02\\nLevel\\xa03\\nTotal\\nAssets\\nCash and cash equivalents:\\nMoney market funds\\n$\\n2,018\\xa0 $\\n—\\xa0 $\\n—\\xa0 $\\n2,018\\xa0\\nCertificates of deposit\\n—\\xa0\\n1\\xa0\\n—\\xa0\\n1\\xa0\\nGovernment bonds\\n—\\xa0\\n115\\xa0\\n—\\xa0\\n115\\xa0\\nCommercial paper\\n—\\xa0\\n223\\xa0\\n—\\xa0\\n223\\xa0\\nCorporate debt securities\\n—\\xa0\\n12\\xa0\\n—\\xa0\\n12\\xa0\\n2,018\\xa0\\n351\\xa0\\n—\\xa0\\n2,369\\xa0\\nShort-term investments:\\nCertificates of deposit\\n—\\xa0\\n172\\xa0\\n—\\xa0\\n172\\xa0\\nGovernment bonds\\n—' metadata={'source': './data/AirBNB10kfilingsq12024.pdf', 'file_path': './data/AirBNB10kfilingsq12024.pdf', 'page': 12, 'total_pages': 54, 'format': 'PDF 1.4', 'title': '0001559720-24-000017', 'author': 'EDGAR® Online LLC, a subsidiary of OTC Markets Group', 'subject': 'Form 10-Q filed on 2024-05-08 for the period ending 2024-03-31', 'keywords': '0001559720-24-000017; ; 10-Q', 'creator': 'EDGAR Filing HTML Converter', 'producer': 'EDGRpdf Service w/ EO.Pdf 22.0.40.0', 'creationDate': \"D:20240508161757-04'00'\", 'modDate': \"D:20240508161807-04'00'\", 'trapped': '', 'encryption': 'Standard V2 R3 128-bit RC4', '_id': '3b5048de80644fc59fd621def77d795d', '_collection_name': 'AirBNB10k'}\n",
140
+ "page_content='and mortgage-backed and asset-backed securities. These amounts do not include funds of $8.7 billion as of March\\xa031, 2024, that we held for bookings in advance of guests\\ncompleting check-ins that we record separately on our unaudited condensed consolidated balance sheet in funds receivable and amounts held on behalf of customers with a\\ncorresponding liability in funds payable and amounts payable to customers.\\nOur cash and cash equivalents are generally held at large global systemically important banks (or “G-SIBs”) which are subject to high capital requirements and are required to\\nregularly perform stringent stress tests related to their ability to absorb capital losses. Our cash, cash equivalents, and short-term investments held outside the United States may be\\nrepatriated, subject to certain limitations, and would be available to be used to fund our domestic operations. However, repatriation of such funds may result in additional tax' metadata={'source': './data/AirBNB10kfilingsq12024.pdf', 'file_path': './data/AirBNB10kfilingsq12024.pdf', 'page': 28, 'total_pages': 54, 'format': 'PDF 1.4', 'title': '0001559720-24-000017', 'author': 'EDGAR® Online LLC, a subsidiary of OTC Markets Group', 'subject': 'Form 10-Q filed on 2024-05-08 for the period ending 2024-03-31', 'keywords': '0001559720-24-000017; ; 10-Q', 'creator': 'EDGAR Filing HTML Converter', 'producer': 'EDGRpdf Service w/ EO.Pdf 22.0.40.0', 'creationDate': \"D:20240508161757-04'00'\", 'modDate': \"D:20240508161807-04'00'\", 'trapped': '', 'encryption': 'Standard V2 R3 128-bit RC4', '_id': 'c4fd17ee80e44fa2babcfc76951eef77', '_collection_name': 'AirBNB10k'}\n"
141
+ ]
142
+ }
143
+ ],
144
+ "source": [
145
+ "for doc in retrieved_documents:\n",
146
+ " print(doc)"
147
+ ]
148
+ },
149
+ {
150
+ "cell_type": "code",
151
+ "execution_count": 16,
152
+ "metadata": {},
153
+ "outputs": [],
154
+ "source": [
155
+ "primary_llm = ChatOpenAI(model_name=\"gpt-4o\", temperature=0)\n",
156
+ "\n",
157
+ "retrieval_augmented_chain = (\n",
158
+ " # INVOKE CHAIN WITH: {\"question\" : \"<<SOME USER QUESTION>>\"}\n",
159
+ " # \"question\" : populated by getting the value of the \"question\" key\n",
160
+ " # \"context\" : populated by getting the value of the \"question\" key and chaining it into the base_retriever\n",
161
+ " {\"context\": itemgetter(\"question\") | retriever, \"question\": itemgetter(\"question\")}\n",
162
+ " | prompt | primary_llm\n",
163
+ " )"
164
+ ]
165
+ },
166
+ {
167
+ "cell_type": "code",
168
+ "execution_count": 17,
169
+ "metadata": {},
170
+ "outputs": [
171
+ {
172
+ "name": "stdout",
173
+ "output_type": "stream",
174
+ "text": [
175
+ "2024-06-20 23:18:25 - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
176
+ "2024-06-20 23:18:26 - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
177
+ ]
178
+ },
179
+ {
180
+ "ename": "TypeError",
181
+ "evalue": "'AIMessage' object is not subscriptable",
182
+ "output_type": "error",
183
+ "traceback": [
184
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
185
+ "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
186
+ "Cell \u001b[0;32mIn[17], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m question \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWhat was the total value of \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCash and cash equivalents\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m as of December 31, 2023?\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 3\u001b[0m result \u001b[38;5;241m=\u001b[39m retrieval_augmented_chain\u001b[38;5;241m.\u001b[39minvoke({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquestion\u001b[39m\u001b[38;5;124m\"\u001b[39m : question})\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mresult\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mresponse\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39mcontent)\n",
187
+ "\u001b[0;31mTypeError\u001b[0m: 'AIMessage' object is not subscriptable"
188
+ ]
189
+ }
190
+ ],
191
+ "source": [
192
+ "question = \"What was the total value of 'Cash and cash equivalents' as of December 31, 2023?\"\n",
193
+ "\n",
194
+ "result = retrieval_augmented_chain.invoke({\"question\" : question})\n",
195
+ "\n",
196
+ "print(result[\"response\"].content)"
197
+ ]
198
+ },
199
+ {
200
+ "cell_type": "code",
201
+ "execution_count": 20,
202
+ "metadata": {},
203
+ "outputs": [
204
+ {
205
+ "name": "stdout",
206
+ "output_type": "stream",
207
+ "text": [
208
+ "content=\"The total value of 'Cash and cash equivalents' as of December 31, 2023, was $6,874 million.\" response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 2129, 'total_tokens': 2156}, 'model_name': 'gpt-4o', 'system_fingerprint': 'fp_3e7d703517', 'finish_reason': 'stop', 'logprobs': None} id='run-1a2044fd-54bb-4f2d-bb88-cfafd050ee3c-0' usage_metadata={'input_tokens': 2129, 'output_tokens': 27, 'total_tokens': 2156}\n"
209
+ ]
210
+ }
211
+ ],
212
+ "source": [
213
+ "print(result)"
214
+ ]
215
+ }
216
+ ],
217
+ "metadata": {
218
+ "kernelspec": {
219
+ "display_name": "llmops-course",
220
+ "language": "python",
221
+ "name": "python3"
222
+ },
223
+ "language_info": {
224
+ "codemirror_mode": {
225
+ "name": "ipython",
226
+ "version": 3
227
+ },
228
+ "file_extension": ".py",
229
+ "mimetype": "text/x-python",
230
+ "name": "python",
231
+ "nbconvert_exporter": "python",
232
+ "pygments_lexer": "ipython3",
233
+ "version": "3.11.9"
234
+ }
235
+ },
236
+ "nbformat": 4,
237
+ "nbformat_minor": 2
238
+ }