Spaces:
Sleeping
Sleeping
Sam
commited on
Commit
·
f3a52d4
1
Parent(s):
5f56729
Revised and cleaned up comments in app.py for clarity and completeness
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
|
2 |
import os
|
3 |
from dotenv import load_dotenv
|
4 |
|
@@ -14,17 +14,15 @@ import tiktoken
|
|
14 |
# Specific imports from the libraries
|
15 |
from langchain.document_loaders import PyMuPDFLoader
|
16 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
17 |
-
from langchain.embeddings import OpenAIEmbeddings
|
18 |
-
#old import from langchain_openai import OpenAIEmbeddings
|
19 |
from langchain_community.vectorstores import Qdrant
|
20 |
from langchain.prompts import ChatPromptTemplate
|
21 |
-
from langchain.chat_models import ChatOpenAI
|
22 |
-
#old import from langchain_openai import ChatOpenAI
|
23 |
from operator import itemgetter
|
24 |
from langchain.schema.output_parser import StrOutputParser
|
25 |
from langchain.schema.runnable import RunnablePassthrough
|
26 |
|
27 |
-
|
28 |
load_dotenv()
|
29 |
|
30 |
# Load environment variables
|
@@ -33,11 +31,12 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
|
33 |
# Initialize OpenAI client after loading the environment variables
|
34 |
openai.api_key = OPENAI_API_KEY
|
35 |
|
36 |
-
|
37 |
loader = PyMuPDFLoader("/home/user/app/data/airbnb_q1_2024.pdf")
|
38 |
-
#old file path is loader = PyMuPDFLoader("/Users/sampazar/AIE3-Midterm/data/airbnb_q1_2024.pdf")
|
39 |
documents = loader.load()
|
40 |
|
|
|
|
|
41 |
def tiktoken_len(text):
|
42 |
tokens = tiktoken.encoding_for_model("gpt-4o").encode(text)
|
43 |
return len(tokens)
|
@@ -50,6 +49,7 @@ text_splitter = RecursiveCharacterTextSplitter(
|
|
50 |
|
51 |
split_chunks = text_splitter.split_documents(documents)
|
52 |
|
|
|
53 |
|
54 |
# Load OpenAI Embeddings Model
|
55 |
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
|
@@ -65,7 +65,8 @@ qdrant_vector_store = Qdrant.from_documents(
|
|
65 |
# Create a Retriever
|
66 |
retriever = qdrant_vector_store.as_retriever()
|
67 |
|
68 |
-
|
|
|
69 |
template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':
|
70 |
|
71 |
Context:
|
@@ -80,7 +81,12 @@ prompt = ChatPromptTemplate.from_template(template)
|
|
80 |
# Define the primary LLM
|
81 |
primary_llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
|
82 |
|
83 |
-
|
|
|
|
|
|
|
|
|
|
|
84 |
retrieval_augmented_qa_chain = (
|
85 |
# INVOKE CHAIN WITH: {"question" : "<>"}
|
86 |
# "question" : populated by getting the value of the "question" key
|
@@ -95,8 +101,9 @@ retrieval_augmented_qa_chain = (
|
|
95 |
| {"response": prompt | primary_llm, "context": itemgetter("context")}
|
96 |
)
|
97 |
|
98 |
-
|
99 |
-
|
|
|
100 |
async def start_chat():
|
101 |
settings = {
|
102 |
"model": "gpt-4o",
|
@@ -108,14 +115,17 @@ async def start_chat():
|
|
108 |
}
|
109 |
cl.user_session.set("settings", settings)
|
110 |
|
111 |
-
|
|
|
|
|
|
|
|
|
|
|
112 |
async def handle_message(message: cl.Message):
|
113 |
settings = cl.user_session.get("settings")
|
114 |
|
115 |
response = retrieval_augmented_qa_chain.invoke({"question": message.content})
|
116 |
-
|
117 |
-
#msg = cl.Message(content=response["response"])
|
118 |
-
#await msg.send()
|
119 |
|
120 |
# Extracting and sending just the content
|
121 |
content = response["response"].content
|
|
|
1 |
+
#-----Import Required Libraries-----#
|
2 |
import os
|
3 |
from dotenv import load_dotenv
|
4 |
|
|
|
14 |
# Specific imports from the libraries
|
15 |
from langchain.document_loaders import PyMuPDFLoader
|
16 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
17 |
+
from langchain.embeddings import OpenAIEmbeddings #Note: Old import was - from langchain_openai import OpenAIEmbeddings
|
|
|
18 |
from langchain_community.vectorstores import Qdrant
|
19 |
from langchain.prompts import ChatPromptTemplate
|
20 |
+
from langchain.chat_models import ChatOpenAI #Note: Old import was - from langchain_openai import ChatOpenAI
|
|
|
21 |
from operator import itemgetter
|
22 |
from langchain.schema.output_parser import StrOutputParser
|
23 |
from langchain.schema.runnable import RunnablePassthrough
|
24 |
|
25 |
+
#-----Set Environment Variables-----#
|
26 |
load_dotenv()
|
27 |
|
28 |
# Load environment variables
|
|
|
31 |
# Initialize OpenAI client after loading the environment variables
|
32 |
openai.api_key = OPENAI_API_KEY
|
33 |
|
34 |
+
#-----Document Loading and Processing -----#
|
35 |
loader = PyMuPDFLoader("/home/user/app/data/airbnb_q1_2024.pdf")
|
|
|
36 |
documents = loader.load()
|
37 |
|
38 |
+
#Note: I changed the loader file path from one that worked locally only to one that worked with Docker. The old file path is loader = PyMuPDFLoader("/Users/sampazar/AIE3-Midterm/data/airbnb_q1_2024.pdf")
|
39 |
+
|
40 |
def tiktoken_len(text):
|
41 |
tokens = tiktoken.encoding_for_model("gpt-4o").encode(text)
|
42 |
return len(tokens)
|
|
|
49 |
|
50 |
split_chunks = text_splitter.split_documents(documents)
|
51 |
|
52 |
+
#-----Embedding and Vector Store Setup-----#
|
53 |
|
54 |
# Load OpenAI Embeddings Model
|
55 |
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
|
|
|
65 |
# Create a Retriever
|
66 |
retriever = qdrant_vector_store.as_retriever()
|
67 |
|
68 |
+
#-----Prompt Template and Language Model Setup-----#
|
69 |
+
# Define the prompt template
|
70 |
template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':
|
71 |
|
72 |
Context:
|
|
|
81 |
# Define the primary LLM
|
82 |
primary_llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
|
83 |
|
84 |
+
#-----Creating a Retrieval Augmented Generation (RAG) Chain-----#
|
85 |
+
# The RAG chain:
|
86 |
+
# (1) Takes the user question and retrieves relevant context,
|
87 |
+
# (2) Passes the context through unchanged,
|
88 |
+
# (3) Formats the prompt with context and question, then send it to the LLM to generate a response
|
89 |
+
|
90 |
retrieval_augmented_qa_chain = (
|
91 |
# INVOKE CHAIN WITH: {"question" : "<>"}
|
92 |
# "question" : populated by getting the value of the "question" key
|
|
|
101 |
| {"response": prompt | primary_llm, "context": itemgetter("context")}
|
102 |
)
|
103 |
|
104 |
+
#-----Chainlit Integration-----#
|
105 |
+
# Sets initial chat settings at the start of a user session
|
106 |
+
@cl.on_chat_start
|
107 |
async def start_chat():
|
108 |
settings = {
|
109 |
"model": "gpt-4o",
|
|
|
115 |
}
|
116 |
cl.user_session.set("settings", settings)
|
117 |
|
118 |
+
# Processes incoming messages from the user and sends a response through a series of steps:
|
119 |
+
# (1) Retrieves the user's settings
|
120 |
+
# (2) Invokes the RAG chain with the user's message
|
121 |
+
# (3) Extracts the content from the response and sends it back to the user
|
122 |
+
|
123 |
+
@cl.on_message
|
124 |
async def handle_message(message: cl.Message):
|
125 |
settings = cl.user_session.get("settings")
|
126 |
|
127 |
response = retrieval_augmented_qa_chain.invoke({"question": message.content})
|
128 |
+
|
|
|
|
|
129 |
|
130 |
# Extracting and sending just the content
|
131 |
content = response["response"].content
|