Spaces:
Runtime error
Runtime error
Uploading App
Browse files- .DS_Store +0 -0
- .chainlit/config.toml +84 -0
- .env +1 -0
- Dockerfile +12 -0
- app.py +123 -0
- chainlit.md +1 -0
- data/AirBNB10kfilingsq12024.pdf +0 -0
- requirements.txt +14 -0
- testenvironment.ipynb +238 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
.chainlit/config.toml
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
# Whether to enable telemetry (default: true). No personal data is collected.
|
3 |
+
enable_telemetry = true
|
4 |
+
|
5 |
+
# List of environment variables to be provided by each user to use the app.
|
6 |
+
user_env = []
|
7 |
+
|
8 |
+
# Duration (in seconds) during which the session is saved when the connection is lost
|
9 |
+
session_timeout = 3600
|
10 |
+
|
11 |
+
# Enable third parties caching (e.g LangChain cache)
|
12 |
+
cache = false
|
13 |
+
|
14 |
+
# Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
|
15 |
+
# follow_symlink = false
|
16 |
+
|
17 |
+
[features]
|
18 |
+
# Show the prompt playground
|
19 |
+
prompt_playground = true
|
20 |
+
|
21 |
+
# Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
|
22 |
+
unsafe_allow_html = false
|
23 |
+
|
24 |
+
# Process and display mathematical expressions. This can clash with "$" characters in messages.
|
25 |
+
latex = false
|
26 |
+
|
27 |
+
# Authorize users to upload files with messages
|
28 |
+
multi_modal = true
|
29 |
+
|
30 |
+
# Allows user to use speech to text
|
31 |
+
[features.speech_to_text]
|
32 |
+
enabled = false
|
33 |
+
# See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
|
34 |
+
# language = "en-US"
|
35 |
+
|
36 |
+
[UI]
|
37 |
+
# Name of the app and chatbot.
|
38 |
+
name = "Chatbot"
|
39 |
+
|
40 |
+
# Show the readme while the conversation is empty.
|
41 |
+
show_readme_as_default = true
|
42 |
+
|
43 |
+
# Description of the app and chatbot. This is used for HTML tags.
|
44 |
+
# description = ""
|
45 |
+
|
46 |
+
# Large size content are by default collapsed for a cleaner ui
|
47 |
+
default_collapse_content = true
|
48 |
+
|
49 |
+
# The default value for the expand messages settings.
|
50 |
+
default_expand_messages = false
|
51 |
+
|
52 |
+
# Hide the chain of thought details from the user in the UI.
|
53 |
+
hide_cot = false
|
54 |
+
|
55 |
+
# Link to your github repo. This will add a github button in the UI's header.
|
56 |
+
# github = ""
|
57 |
+
|
58 |
+
# Specify a CSS file that can be used to customize the user interface.
|
59 |
+
# The CSS file can be served from the public directory or via an external link.
|
60 |
+
# custom_css = "/public/test.css"
|
61 |
+
|
62 |
+
# Override default MUI light theme. (Check theme.ts)
|
63 |
+
[UI.theme.light]
|
64 |
+
#background = "#FAFAFA"
|
65 |
+
#paper = "#FFFFFF"
|
66 |
+
|
67 |
+
[UI.theme.light.primary]
|
68 |
+
#main = "#F80061"
|
69 |
+
#dark = "#980039"
|
70 |
+
#light = "#FFE7EB"
|
71 |
+
|
72 |
+
# Override default MUI dark theme. (Check theme.ts)
|
73 |
+
[UI.theme.dark]
|
74 |
+
#background = "#FAFAFA"
|
75 |
+
#paper = "#FFFFFF"
|
76 |
+
|
77 |
+
[UI.theme.dark.primary]
|
78 |
+
#main = "#F80061"
|
79 |
+
#dark = "#980039"
|
80 |
+
#light = "#FFE7EB"
|
81 |
+
|
82 |
+
|
83 |
+
[meta]
|
84 |
+
generated_by = "0.7.700"
|
.env
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
OPENAI_API_KEY=sk-proj-AHkNxgunsygzWHhVl9c1T3BlbkFJO7prS6ckuhzhBZfwHZze
|
Dockerfile
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.11.9
|
2 |
+
RUN useradd -m -u 1000 user
|
3 |
+
USER user
|
4 |
+
ENV HOME=/home/user \
|
5 |
+
PATH=/home/user/.local/bin:$PATH
|
6 |
+
RUN mkdir -p $HOME/app/data/vectorstore && chown -R user:user $HOME/app/data
|
7 |
+
WORKDIR $HOME/app
|
8 |
+
COPY --chown=user . $HOME/app
|
9 |
+
COPY ./requirements.txt ~/app/requirements.txt
|
10 |
+
RUN pip install -r requirements.txt
|
11 |
+
COPY . .
|
12 |
+
CMD ["chainlit", "run", "app.py", "--port", "7860"]
|
app.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import openai
|
3 |
+
import chainlit as cl
|
4 |
+
from langchain_community.document_loaders import PyMuPDFLoader
|
5 |
+
from langchain_openai import OpenAIEmbeddings
|
6 |
+
from langchain_openai import ChatOpenAI
|
7 |
+
from langchain_community.vectorstores import Qdrant
|
8 |
+
from langchain.prompts import ChatPromptTemplate
|
9 |
+
|
10 |
+
from dotenv import load_dotenv
|
11 |
+
from operator import itemgetter
|
12 |
+
from langchain_huggingface import HuggingFaceEndpoint
|
13 |
+
from langchain_community.document_loaders import TextLoader
|
14 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
15 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
16 |
+
from langchain_community.vectorstores import FAISS
|
17 |
+
from langchain_huggingface import HuggingFaceEndpointEmbeddings
|
18 |
+
from langchain_core.prompts import PromptTemplate
|
19 |
+
from langchain.schema.output_parser import StrOutputParser
|
20 |
+
from langchain.schema.runnable import RunnablePassthrough
|
21 |
+
from langchain.schema.runnable.config import RunnableConfig
|
22 |
+
|
23 |
+
#Load environment variables
|
24 |
+
load_dotenv()
|
25 |
+
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
|
26 |
+
|
27 |
+
#Load 10-K PDF and split into chunks
|
28 |
+
loader = PyMuPDFLoader (
|
29 |
+
"./data/AirBNB10kfilingsq12024.pdf"
|
30 |
+
)
|
31 |
+
|
32 |
+
documents = loader.load()
|
33 |
+
|
34 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
35 |
+
chunk_size = 1000,
|
36 |
+
chunk_overlap = 100
|
37 |
+
)
|
38 |
+
|
39 |
+
documents = text_splitter.split_documents(documents)
|
40 |
+
|
41 |
+
#Load embeddings model - we'll use OpenAI's text-embedding-3-small
|
42 |
+
embeddings = OpenAIEmbeddings(
|
43 |
+
model="text-embedding-3-small"
|
44 |
+
)
|
45 |
+
|
46 |
+
#Create QDrant vector store
|
47 |
+
qdrant_vector_store = Qdrant.from_documents(
|
48 |
+
documents,
|
49 |
+
embeddings,
|
50 |
+
location=":memory:",
|
51 |
+
collection_name="AirBNB10k",
|
52 |
+
)
|
53 |
+
|
54 |
+
#Create Retriever
|
55 |
+
retriever = qdrant_vector_store.as_retriever()
|
56 |
+
|
57 |
+
#Create Prompt Template
|
58 |
+
template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':
|
59 |
+
|
60 |
+
Context:
|
61 |
+
{context}
|
62 |
+
|
63 |
+
Question:
|
64 |
+
{question}
|
65 |
+
"""
|
66 |
+
|
67 |
+
prompt = ChatPromptTemplate.from_template(template)
|
68 |
+
|
69 |
+
#Choose LLM - we'll use gpt-4o.
|
70 |
+
primary_llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
|
71 |
+
|
72 |
+
#Set up Chainlit
|
73 |
+
@cl.author_rename
|
74 |
+
def rename(original_author: str):
|
75 |
+
"""
|
76 |
+
This function can be used to rename the 'author' of a message.
|
77 |
+
|
78 |
+
In this case, we're overriding the 'Assistant' author to be 'Airbnb10kBot'.
|
79 |
+
"""
|
80 |
+
rename_dict = {
|
81 |
+
"Assistant" : "Airbnb10kBot"
|
82 |
+
}
|
83 |
+
return rename_dict.get(original_author, original_author)
|
84 |
+
|
85 |
+
@cl.on_chat_start
|
86 |
+
async def start_chat():
|
87 |
+
"""
|
88 |
+
This function will be called at the start of every user session.
|
89 |
+
|
90 |
+
We will build our LCEL RAG chain here, and store it in the user session.
|
91 |
+
|
92 |
+
The user session is a dictionary that is unique to each user session, and is stored in the memory of the server.
|
93 |
+
"""
|
94 |
+
retrieval_augmented_chain = (
|
95 |
+
# INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"}
|
96 |
+
# "question" : populated by getting the value of the "question" key
|
97 |
+
# "context" : populated by getting the value of the "question" key and chaining it into the base_retriever
|
98 |
+
{"context": itemgetter("question") | retriever, "question": itemgetter("question")}
|
99 |
+
| prompt | primary_llm
|
100 |
+
)
|
101 |
+
|
102 |
+
cl.user_session.set("retrieval_augmented_chain", retrieval_augmented_chain)
|
103 |
+
|
104 |
+
@cl.on_message
|
105 |
+
async def main(message: cl.Message):
|
106 |
+
"""
|
107 |
+
This function will be called every time a message is recieved from a session.
|
108 |
+
|
109 |
+
We will use the LCEL RAG chain to generate a response to the user query.
|
110 |
+
|
111 |
+
The LCEL RAG chain is stored in the user session, and is unique to each user session - this is why we can access it here.
|
112 |
+
"""
|
113 |
+
retrieval_augmented_chain = cl.user_session.get("retrieval_augmented_chain")
|
114 |
+
|
115 |
+
msg = cl.Message(content="")
|
116 |
+
|
117 |
+
async for chunk in retrieval_augmented_chain.astream(
|
118 |
+
{"question": message.content},
|
119 |
+
config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
|
120 |
+
):
|
121 |
+
await msg.stream_token(chunk.content)
|
122 |
+
|
123 |
+
await msg.send()
|
chainlit.md
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# AIE3 Midterm: RAG-in' on the AirBNB-10k Filing
|
data/AirBNB10kfilingsq12024.pdf
ADDED
Binary file (596 kB). View file
|
|
requirements.txt
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
chainlit==0.7.700
|
2 |
+
langchain==0.2.5
|
3 |
+
langchain_community==0.2.5
|
4 |
+
langchain_core==0.2.9
|
5 |
+
langchain_huggingface==0.0.3
|
6 |
+
langchain_text_splitters==0.2.1
|
7 |
+
python-dotenv==1.0.1
|
8 |
+
langchain-openai
|
9 |
+
langchainhub
|
10 |
+
openai
|
11 |
+
faiss-cpu
|
12 |
+
qdrant-client
|
13 |
+
pymupdf
|
14 |
+
pandas
|
testenvironment.ipynb
ADDED
@@ -0,0 +1,238 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 2,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"!pip install -U -q langchain langchain-openai langchain_core langchain-community langchainhub openai"
|
10 |
+
]
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"cell_type": "code",
|
14 |
+
"execution_count": 3,
|
15 |
+
"metadata": {},
|
16 |
+
"outputs": [],
|
17 |
+
"source": [
|
18 |
+
"!pip install -qU qdrant-client pymupdf pandas"
|
19 |
+
]
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"cell_type": "code",
|
23 |
+
"execution_count": 13,
|
24 |
+
"metadata": {},
|
25 |
+
"outputs": [
|
26 |
+
{
|
27 |
+
"name": "stdout",
|
28 |
+
"output_type": "stream",
|
29 |
+
"text": [
|
30 |
+
"2024-06-20 23:18:02 - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
31 |
+
"2024-06-20 23:18:03 - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
32 |
+
"2024-06-20 23:18:04 - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
33 |
+
"2024-06-20 23:18:05 - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
34 |
+
"2024-06-20 23:18:06 - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
|
35 |
+
]
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"source": [
|
39 |
+
"import os\n",
|
40 |
+
"import openai\n",
|
41 |
+
"import chainlit as cl\n",
|
42 |
+
"from langchain_community.document_loaders import PyMuPDFLoader\n",
|
43 |
+
"from langchain_openai import OpenAIEmbeddings\n",
|
44 |
+
"from langchain_openai import ChatOpenAI\n",
|
45 |
+
"from langchain_community.vectorstores import Qdrant\n",
|
46 |
+
"from langchain.prompts import ChatPromptTemplate\n",
|
47 |
+
"\n",
|
48 |
+
"from dotenv import load_dotenv\n",
|
49 |
+
"from operator import itemgetter\n",
|
50 |
+
"from langchain_huggingface import HuggingFaceEndpoint\n",
|
51 |
+
"from langchain_community.document_loaders import TextLoader\n",
|
52 |
+
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
53 |
+
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
54 |
+
"from langchain_community.vectorstores import FAISS\n",
|
55 |
+
"from langchain_huggingface import HuggingFaceEndpointEmbeddings\n",
|
56 |
+
"from langchain_core.prompts import PromptTemplate\n",
|
57 |
+
"from langchain.schema.output_parser import StrOutputParser\n",
|
58 |
+
"from langchain.schema.runnable import RunnablePassthrough\n",
|
59 |
+
"from langchain.schema.runnable.config import RunnableConfig\n",
|
60 |
+
"\n",
|
61 |
+
"#Load environment variables\n",
|
62 |
+
"load_dotenv()\n",
|
63 |
+
"OPENAI_API_KEY = os.environ[\"OPENAI_API_KEY\"]\n",
|
64 |
+
"\n",
|
65 |
+
"#Load 10-K PDF and split into chunks\n",
|
66 |
+
"loader = PyMuPDFLoader (\n",
|
67 |
+
" \"./data/AirBNB10kfilingsq12024.pdf\"\n",
|
68 |
+
")\n",
|
69 |
+
"\n",
|
70 |
+
"documents = loader.load()\n",
|
71 |
+
"\n",
|
72 |
+
"text_splitter = RecursiveCharacterTextSplitter(\n",
|
73 |
+
" chunk_size = 1000,\n",
|
74 |
+
" chunk_overlap = 100\n",
|
75 |
+
")\n",
|
76 |
+
"\n",
|
77 |
+
"documents = text_splitter.split_documents(documents)\n",
|
78 |
+
"\n",
|
79 |
+
"#Load embeddings model - we'll use OpenAI's text-embedding-3-small\n",
|
80 |
+
"embeddings = OpenAIEmbeddings(\n",
|
81 |
+
" model=\"text-embedding-3-small\"\n",
|
82 |
+
")\n",
|
83 |
+
"\n",
|
84 |
+
"#Create QDrant vector store\n",
|
85 |
+
"qdrant_vector_store = Qdrant.from_documents(\n",
|
86 |
+
" documents,\n",
|
87 |
+
" embeddings,\n",
|
88 |
+
" location=\":memory:\",\n",
|
89 |
+
" collection_name=\"AirBNB10k\",\n",
|
90 |
+
")\n",
|
91 |
+
"\n",
|
92 |
+
"#Create Retriever\n",
|
93 |
+
"retriever = qdrant_vector_store.as_retriever()\n",
|
94 |
+
"\n",
|
95 |
+
"#Create Prompt Template\n",
|
96 |
+
"template = \"\"\"Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':\n",
|
97 |
+
"\n",
|
98 |
+
"Context:\n",
|
99 |
+
"{context}\n",
|
100 |
+
"\n",
|
101 |
+
"Question:\n",
|
102 |
+
"{question}\n",
|
103 |
+
"\"\"\"\n",
|
104 |
+
"\n",
|
105 |
+
"prompt = ChatPromptTemplate.from_template(template)\n",
|
106 |
+
"\n",
|
107 |
+
"#Choose LLM - we'll use gpt-4o.\n",
|
108 |
+
"primary_llm = ChatOpenAI(model_name=\"gpt-4o\", temperature=0)"
|
109 |
+
]
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"cell_type": "code",
|
113 |
+
"execution_count": 14,
|
114 |
+
"metadata": {},
|
115 |
+
"outputs": [
|
116 |
+
{
|
117 |
+
"name": "stdout",
|
118 |
+
"output_type": "stream",
|
119 |
+
"text": [
|
120 |
+
"2024-06-20 23:18:10 - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
|
121 |
+
]
|
122 |
+
}
|
123 |
+
],
|
124 |
+
"source": [
|
125 |
+
"retrieved_documents = retriever.invoke(\"What was the total value of 'Cash and cash equivalents' as of December 31, 2023?\")"
|
126 |
+
]
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"cell_type": "code",
|
130 |
+
"execution_count": 15,
|
131 |
+
"metadata": {},
|
132 |
+
"outputs": [
|
133 |
+
{
|
134 |
+
"name": "stdout",
|
135 |
+
"output_type": "stream",
|
136 |
+
"text": [
|
137 |
+
"page_content='Table of Contents\\nAirbnb, Inc.\\nNotes to Condensed Consolidated Financial Statements (unaudited)\\nNote 3. Supplemental Financial Statement Information\\nCash, Cash Equivalents, and Restricted Cash\\nThe following table reconciles cash, cash equivalents, and restricted cash reported on the Company’s unaudited condensed consolidated balance sheets to the total amount\\npresented in the unaudited condensed consolidated statements of cash flows (in millions):\\nDecember 31,\\n2023\\nMarch 31,\\n2024\\nCash and cash equivalents\\n$\\n6,874\\xa0 $\\n7,829\\xa0\\nCash and cash equivalents included in funds receivable and amounts held on behalf of customers\\n5,769\\xa0\\n8,665\\xa0\\nRestricted cash included in prepaids and other current assets\\n24\\xa0\\n35\\xa0\\nTotal cash, cash equivalents, and restricted cash presented in the unaudited condensed consolidated statements of cash flows\\n$\\n12,667\\xa0 $\\n16,529\\xa0\\nSupplemental disclosures of balance sheet information\\nSupplemental balance sheet information consisted of the following (in millions):\\nDecember 31,' metadata={'source': './data/AirBNB10kfilingsq12024.pdf', 'file_path': './data/AirBNB10kfilingsq12024.pdf', 'page': 10, 'total_pages': 54, 'format': 'PDF 1.4', 'title': '0001559720-24-000017', 'author': 'EDGAR® Online LLC, a subsidiary of OTC Markets Group', 'subject': 'Form 10-Q filed on 2024-05-08 for the period ending 2024-03-31', 'keywords': '0001559720-24-000017; ; 10-Q', 'creator': 'EDGAR Filing HTML Converter', 'producer': 'EDGRpdf Service w/ EO.Pdf 22.0.40.0', 'creationDate': \"D:20240508161757-04'00'\", 'modDate': \"D:20240508161807-04'00'\", 'trapped': '', 'encryption': 'Standard V2 R3 128-bit RC4', '_id': 'aeb9683f378a493bab25395753cc9fb5', '_collection_name': 'AirBNB10k'}\n",
|
138 |
+
"page_content='liabilities. We believe that our existing cash, cash equivalents, and short-term investments balances in the United States are sufficient to fund our working capital needs in the United\\nStates.\\nWe have access to $1.0 billion of commitments and a $200\\xa0million sub-limit for the issuance of letters of credit under the 2022 Credit Facility. As of March\\xa031, 2024, no amounts were\\ndrawn under the 2022 Credit Facility and outstanding letters of credit totaled $25 million.\\nMaterial Cash Requirements\\nAs of March\\xa031, 2024, we had outstanding $2.0\\xa0billion in aggregate principal amount of indebtedness of our 0% convertible senior notes due in 2026. On March 3, 2021, in\\nconnection with the pricing of the 2026 Notes, we entered into privately negotiated capped call transactions (the “Capped Calls”) with certain of the initial purchasers and other' metadata={'source': './data/AirBNB10kfilingsq12024.pdf', 'file_path': './data/AirBNB10kfilingsq12024.pdf', 'page': 28, 'total_pages': 54, 'format': 'PDF 1.4', 'title': '0001559720-24-000017', 'author': 'EDGAR® Online LLC, a subsidiary of OTC Markets Group', 'subject': 'Form 10-Q filed on 2024-05-08 for the period ending 2024-03-31', 'keywords': '0001559720-24-000017; ; 10-Q', 'creator': 'EDGAR Filing HTML Converter', 'producer': 'EDGRpdf Service w/ EO.Pdf 22.0.40.0', 'creationDate': \"D:20240508161757-04'00'\", 'modDate': \"D:20240508161807-04'00'\", 'trapped': '', 'encryption': 'Standard V2 R3 128-bit RC4', '_id': '19d099a757a5473c9b1eceafc54bc0cf', '_collection_name': 'AirBNB10k'}\n",
|
139 |
+
"page_content='unrealized loss position for more than twelve months as of December\\xa031, 2023 and March\\xa031, 2024, respectively.\\nThe following table summarizes the contractual maturities of the Company’s available-for-sale debt securities (in millions):\\nMarch 31, 2024\\nAmortized\\nCost\\nEstimated\\nFair Value\\nDue within one year\\n$\\n1,489\\xa0 $\\n1,489\\xa0\\nDue after one year through five years\\n957\\xa0\\n947\\xa0\\nDue after five years\\n96\\xa0\\n92\\xa0\\nTotal\\n$\\n2,542\\xa0 $\\n2,528\\xa0\\nNote 5. Fair Value Measurements and Financial Instruments\\nThe following table summarizes the Company’s financial assets and liabilities measured at fair value on a recurring basis (in millions):\\nDecember 31, 2023\\nLevel\\xa01\\nLevel\\xa02\\nLevel\\xa03\\nTotal\\nAssets\\nCash and cash equivalents:\\nMoney market funds\\n$\\n2,018\\xa0 $\\n—\\xa0 $\\n—\\xa0 $\\n2,018\\xa0\\nCertificates of deposit\\n—\\xa0\\n1\\xa0\\n—\\xa0\\n1\\xa0\\nGovernment bonds\\n—\\xa0\\n115\\xa0\\n—\\xa0\\n115\\xa0\\nCommercial paper\\n—\\xa0\\n223\\xa0\\n—\\xa0\\n223\\xa0\\nCorporate debt securities\\n—\\xa0\\n12\\xa0\\n—\\xa0\\n12\\xa0\\n2,018\\xa0\\n351\\xa0\\n—\\xa0\\n2,369\\xa0\\nShort-term investments:\\nCertificates of deposit\\n—\\xa0\\n172\\xa0\\n—\\xa0\\n172\\xa0\\nGovernment bonds\\n—' metadata={'source': './data/AirBNB10kfilingsq12024.pdf', 'file_path': './data/AirBNB10kfilingsq12024.pdf', 'page': 12, 'total_pages': 54, 'format': 'PDF 1.4', 'title': '0001559720-24-000017', 'author': 'EDGAR® Online LLC, a subsidiary of OTC Markets Group', 'subject': 'Form 10-Q filed on 2024-05-08 for the period ending 2024-03-31', 'keywords': '0001559720-24-000017; ; 10-Q', 'creator': 'EDGAR Filing HTML Converter', 'producer': 'EDGRpdf Service w/ EO.Pdf 22.0.40.0', 'creationDate': \"D:20240508161757-04'00'\", 'modDate': \"D:20240508161807-04'00'\", 'trapped': '', 'encryption': 'Standard V2 R3 128-bit RC4', '_id': '3b5048de80644fc59fd621def77d795d', '_collection_name': 'AirBNB10k'}\n",
|
140 |
+
"page_content='and mortgage-backed and asset-backed securities. These amounts do not include funds of $8.7 billion as of March\\xa031, 2024, that we held for bookings in advance of guests\\ncompleting check-ins that we record separately on our unaudited condensed consolidated balance sheet in funds receivable and amounts held on behalf of customers with a\\ncorresponding liability in funds payable and amounts payable to customers.\\nOur cash and cash equivalents are generally held at large global systemically important banks (or “G-SIBs”) which are subject to high capital requirements and are required to\\nregularly perform stringent stress tests related to their ability to absorb capital losses. Our cash, cash equivalents, and short-term investments held outside the United States may be\\nrepatriated, subject to certain limitations, and would be available to be used to fund our domestic operations. However, repatriation of such funds may result in additional tax' metadata={'source': './data/AirBNB10kfilingsq12024.pdf', 'file_path': './data/AirBNB10kfilingsq12024.pdf', 'page': 28, 'total_pages': 54, 'format': 'PDF 1.4', 'title': '0001559720-24-000017', 'author': 'EDGAR® Online LLC, a subsidiary of OTC Markets Group', 'subject': 'Form 10-Q filed on 2024-05-08 for the period ending 2024-03-31', 'keywords': '0001559720-24-000017; ; 10-Q', 'creator': 'EDGAR Filing HTML Converter', 'producer': 'EDGRpdf Service w/ EO.Pdf 22.0.40.0', 'creationDate': \"D:20240508161757-04'00'\", 'modDate': \"D:20240508161807-04'00'\", 'trapped': '', 'encryption': 'Standard V2 R3 128-bit RC4', '_id': 'c4fd17ee80e44fa2babcfc76951eef77', '_collection_name': 'AirBNB10k'}\n"
|
141 |
+
]
|
142 |
+
}
|
143 |
+
],
|
144 |
+
"source": [
|
145 |
+
"for doc in retrieved_documents:\n",
|
146 |
+
" print(doc)"
|
147 |
+
]
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"cell_type": "code",
|
151 |
+
"execution_count": 16,
|
152 |
+
"metadata": {},
|
153 |
+
"outputs": [],
|
154 |
+
"source": [
|
155 |
+
"primary_llm = ChatOpenAI(model_name=\"gpt-4o\", temperature=0)\n",
|
156 |
+
"\n",
|
157 |
+
"retrieval_augmented_chain = (\n",
|
158 |
+
" # INVOKE CHAIN WITH: {\"question\" : \"<<SOME USER QUESTION>>\"}\n",
|
159 |
+
" # \"question\" : populated by getting the value of the \"question\" key\n",
|
160 |
+
" # \"context\" : populated by getting the value of the \"question\" key and chaining it into the base_retriever\n",
|
161 |
+
" {\"context\": itemgetter(\"question\") | retriever, \"question\": itemgetter(\"question\")}\n",
|
162 |
+
" | prompt | primary_llm\n",
|
163 |
+
" )"
|
164 |
+
]
|
165 |
+
},
|
166 |
+
{
|
167 |
+
"cell_type": "code",
|
168 |
+
"execution_count": 17,
|
169 |
+
"metadata": {},
|
170 |
+
"outputs": [
|
171 |
+
{
|
172 |
+
"name": "stdout",
|
173 |
+
"output_type": "stream",
|
174 |
+
"text": [
|
175 |
+
"2024-06-20 23:18:25 - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
176 |
+
"2024-06-20 23:18:26 - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
|
177 |
+
]
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"ename": "TypeError",
|
181 |
+
"evalue": "'AIMessage' object is not subscriptable",
|
182 |
+
"output_type": "error",
|
183 |
+
"traceback": [
|
184 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
185 |
+
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
|
186 |
+
"Cell \u001b[0;32mIn[17], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m question \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWhat was the total value of \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCash and cash equivalents\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m as of December 31, 2023?\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 3\u001b[0m result \u001b[38;5;241m=\u001b[39m retrieval_augmented_chain\u001b[38;5;241m.\u001b[39minvoke({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquestion\u001b[39m\u001b[38;5;124m\"\u001b[39m : question})\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mresult\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mresponse\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39mcontent)\n",
|
187 |
+
"\u001b[0;31mTypeError\u001b[0m: 'AIMessage' object is not subscriptable"
|
188 |
+
]
|
189 |
+
}
|
190 |
+
],
|
191 |
+
"source": [
|
192 |
+
"question = \"What was the total value of 'Cash and cash equivalents' as of December 31, 2023?\"\n",
|
193 |
+
"\n",
|
194 |
+
"result = retrieval_augmented_chain.invoke({\"question\" : question})\n",
|
195 |
+
"\n",
|
196 |
+
"print(result[\"response\"].content)"
|
197 |
+
]
|
198 |
+
},
|
199 |
+
{
|
200 |
+
"cell_type": "code",
|
201 |
+
"execution_count": 20,
|
202 |
+
"metadata": {},
|
203 |
+
"outputs": [
|
204 |
+
{
|
205 |
+
"name": "stdout",
|
206 |
+
"output_type": "stream",
|
207 |
+
"text": [
|
208 |
+
"content=\"The total value of 'Cash and cash equivalents' as of December 31, 2023, was $6,874 million.\" response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 2129, 'total_tokens': 2156}, 'model_name': 'gpt-4o', 'system_fingerprint': 'fp_3e7d703517', 'finish_reason': 'stop', 'logprobs': None} id='run-1a2044fd-54bb-4f2d-bb88-cfafd050ee3c-0' usage_metadata={'input_tokens': 2129, 'output_tokens': 27, 'total_tokens': 2156}\n"
|
209 |
+
]
|
210 |
+
}
|
211 |
+
],
|
212 |
+
"source": [
|
213 |
+
"print(result)"
|
214 |
+
]
|
215 |
+
}
|
216 |
+
],
|
217 |
+
"metadata": {
|
218 |
+
"kernelspec": {
|
219 |
+
"display_name": "llmops-course",
|
220 |
+
"language": "python",
|
221 |
+
"name": "python3"
|
222 |
+
},
|
223 |
+
"language_info": {
|
224 |
+
"codemirror_mode": {
|
225 |
+
"name": "ipython",
|
226 |
+
"version": 3
|
227 |
+
},
|
228 |
+
"file_extension": ".py",
|
229 |
+
"mimetype": "text/x-python",
|
230 |
+
"name": "python",
|
231 |
+
"nbconvert_exporter": "python",
|
232 |
+
"pygments_lexer": "ipython3",
|
233 |
+
"version": "3.11.9"
|
234 |
+
}
|
235 |
+
},
|
236 |
+
"nbformat": 4,
|
237 |
+
"nbformat_minor": 2
|
238 |
+
}
|