Sam commited on
Commit
f3a52d4
·
1 Parent(s): 5f56729

Revised and cleaned up comments in app.py for clarity and completeness

Browse files
Files changed (1) hide show
  1. app.py +26 -16
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # Import Required Libraries
2
  import os
3
  from dotenv import load_dotenv
4
 
@@ -14,17 +14,15 @@ import tiktoken
14
  # Specific imports from the libraries
15
  from langchain.document_loaders import PyMuPDFLoader
16
  from langchain.text_splitter import RecursiveCharacterTextSplitter
17
- from langchain.embeddings import OpenAIEmbeddings
18
- #old import from langchain_openai import OpenAIEmbeddings
19
  from langchain_community.vectorstores import Qdrant
20
  from langchain.prompts import ChatPromptTemplate
21
- from langchain.chat_models import ChatOpenAI
22
- #old import from langchain_openai import ChatOpenAI
23
  from operator import itemgetter
24
  from langchain.schema.output_parser import StrOutputParser
25
  from langchain.schema.runnable import RunnablePassthrough
26
 
27
- # Set Environment Variables
28
  load_dotenv()
29
 
30
  # Load environment variables
@@ -33,11 +31,12 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
33
  # Initialize OpenAI client after loading the environment variables
34
  openai.api_key = OPENAI_API_KEY
35
 
36
- # Load and split documents
37
  loader = PyMuPDFLoader("/home/user/app/data/airbnb_q1_2024.pdf")
38
- #old file path is loader = PyMuPDFLoader("/Users/sampazar/AIE3-Midterm/data/airbnb_q1_2024.pdf")
39
  documents = loader.load()
40
 
 
 
41
  def tiktoken_len(text):
42
  tokens = tiktoken.encoding_for_model("gpt-4o").encode(text)
43
  return len(tokens)
@@ -50,6 +49,7 @@ text_splitter = RecursiveCharacterTextSplitter(
50
 
51
  split_chunks = text_splitter.split_documents(documents)
52
 
 
53
 
54
  # Load OpenAI Embeddings Model
55
  embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
@@ -65,7 +65,8 @@ qdrant_vector_store = Qdrant.from_documents(
65
  # Create a Retriever
66
  retriever = qdrant_vector_store.as_retriever()
67
 
68
- # Create a prompt template
 
69
  template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':
70
 
71
  Context:
@@ -80,7 +81,12 @@ prompt = ChatPromptTemplate.from_template(template)
80
  # Define the primary LLM
81
  primary_llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
82
 
83
- # Creating a Retrieval Augmented Generation (RAG) Chain
 
 
 
 
 
84
  retrieval_augmented_qa_chain = (
85
  # INVOKE CHAIN WITH: {"question" : "<>"}
86
  # "question" : populated by getting the value of the "question" key
@@ -95,8 +101,9 @@ retrieval_augmented_qa_chain = (
95
  | {"response": prompt | primary_llm, "context": itemgetter("context")}
96
  )
97
 
98
- # Chainlit integration for deployment
99
- @cl.on_chat_start # marks a function that will be executed at the start of a user session
 
100
  async def start_chat():
101
  settings = {
102
  "model": "gpt-4o",
@@ -108,14 +115,17 @@ async def start_chat():
108
  }
109
  cl.user_session.set("settings", settings)
110
 
111
- @cl.on_message # marks a function that should be run each time the chatbot receives a message from a user
 
 
 
 
 
112
  async def handle_message(message: cl.Message):
113
  settings = cl.user_session.get("settings")
114
 
115
  response = retrieval_augmented_qa_chain.invoke({"question": message.content})
116
-
117
- #msg = cl.Message(content=response["response"])
118
- #await msg.send()
119
 
120
  # Extracting and sending just the content
121
  content = response["response"].content
 
1
+ #-----Import Required Libraries-----#
2
  import os
3
  from dotenv import load_dotenv
4
 
 
14
  # Specific imports from the libraries
15
  from langchain.document_loaders import PyMuPDFLoader
16
  from langchain.text_splitter import RecursiveCharacterTextSplitter
17
+ from langchain.embeddings import OpenAIEmbeddings #Note: Old import was - from langchain_openai import OpenAIEmbeddings
 
18
  from langchain_community.vectorstores import Qdrant
19
  from langchain.prompts import ChatPromptTemplate
20
+ from langchain.chat_models import ChatOpenAI #Note: Old import was - from langchain_openai import ChatOpenAI
 
21
  from operator import itemgetter
22
  from langchain.schema.output_parser import StrOutputParser
23
  from langchain.schema.runnable import RunnablePassthrough
24
 
25
+ #-----Set Environment Variables-----#
26
  load_dotenv()
27
 
28
  # Load environment variables
 
31
  # Initialize OpenAI client after loading the environment variables
32
  openai.api_key = OPENAI_API_KEY
33
 
34
+ #-----Document Loading and Processing -----#
35
  loader = PyMuPDFLoader("/home/user/app/data/airbnb_q1_2024.pdf")
 
36
  documents = loader.load()
37
 
38
+ #Note: I changed the loader file path from one that worked locally only to one that worked with Docker. The old file path is loader = PyMuPDFLoader("/Users/sampazar/AIE3-Midterm/data/airbnb_q1_2024.pdf")
39
+
40
  def tiktoken_len(text):
41
  tokens = tiktoken.encoding_for_model("gpt-4o").encode(text)
42
  return len(tokens)
 
49
 
50
  split_chunks = text_splitter.split_documents(documents)
51
 
52
+ #-----Embedding and Vector Store Setup-----#
53
 
54
  # Load OpenAI Embeddings Model
55
  embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
 
65
  # Create a Retriever
66
  retriever = qdrant_vector_store.as_retriever()
67
 
68
+ #-----Prompt Template and Language Model Setup-----#
69
+ # Define the prompt template
70
  template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':
71
 
72
  Context:
 
81
  # Define the primary LLM
82
  primary_llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
83
 
84
+ #-----Creating a Retrieval Augmented Generation (RAG) Chain-----#
85
+ # The RAG chain:
86
+ # (1) Takes the user question and retrieves relevant context,
87
+ # (2) Passes the context through unchanged,
88
+ # (3) Formats the prompt with context and question, then send it to the LLM to generate a response
89
+
90
  retrieval_augmented_qa_chain = (
91
  # INVOKE CHAIN WITH: {"question" : "<>"}
92
  # "question" : populated by getting the value of the "question" key
 
101
  | {"response": prompt | primary_llm, "context": itemgetter("context")}
102
  )
103
 
104
+ #-----Chainlit Integration-----#
105
+ # Sets initial chat settings at the start of a user session
106
+ @cl.on_chat_start
107
  async def start_chat():
108
  settings = {
109
  "model": "gpt-4o",
 
115
  }
116
  cl.user_session.set("settings", settings)
117
 
118
+ # Processes incoming messages from the user and sends a response through a series of steps:
119
+ # (1) Retrieves the user's settings
120
+ # (2) Invokes the RAG chain with the user's message
121
+ # (3) Extracts the content from the response and sends it back to the user
122
+
123
+ @cl.on_message
124
  async def handle_message(message: cl.Message):
125
  settings = cl.user_session.get("settings")
126
 
127
  response = retrieval_augmented_qa_chain.invoke({"question": message.content})
128
+
 
 
129
 
130
  # Extracting and sending just the content
131
  content = response["response"].content