Spaces:

sampazar
/

midterm-app

Sleeping

App Files Files Community

Sam commited on Jun 25, 2024

Commit

f3a52d4

1 Parent(s): 5f56729

Revised and cleaned up comments in app.py for clarity and completeness

Browse files

Files changed (1) hide show

app.py +26 -16

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Import Required Libraries
 import os
 from dotenv import load_dotenv
@@ -14,17 +14,15 @@ import tiktoken
 # Specific imports from the libraries
 from langchain.document_loaders import PyMuPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.embeddings import OpenAIEmbeddings
-#old import from langchain_openai import OpenAIEmbeddings
 from langchain_community.vectorstores import Qdrant
 from langchain.prompts import ChatPromptTemplate
-from langchain.chat_models import ChatOpenAI
-#old import from langchain_openai import ChatOpenAI
 from operator import itemgetter
 from langchain.schema.output_parser import StrOutputParser
 from langchain.schema.runnable import RunnablePassthrough
-# Set Environment Variables
 load_dotenv()
 # Load environment variables
@@ -33,11 +31,12 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 # Initialize OpenAI client after loading the environment variables
 openai.api_key = OPENAI_API_KEY
-# Load and split documents
 loader = PyMuPDFLoader("/home/user/app/data/airbnb_q1_2024.pdf")
-#old file path is loader = PyMuPDFLoader("/Users/sampazar/AIE3-Midterm/data/airbnb_q1_2024.pdf")
 documents = loader.load()
 def tiktoken_len(text):
     tokens = tiktoken.encoding_for_model("gpt-4o").encode(text)
     return len(tokens)
@@ -50,6 +49,7 @@ text_splitter = RecursiveCharacterTextSplitter(
 split_chunks = text_splitter.split_documents(documents)
 # Load OpenAI Embeddings Model
 embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
@@ -65,7 +65,8 @@ qdrant_vector_store = Qdrant.from_documents(
 # Create a Retriever
 retriever = qdrant_vector_store.as_retriever()
-# Create a prompt template
 template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':
 Context:
@@ -80,7 +81,12 @@ prompt = ChatPromptTemplate.from_template(template)
 # Define the primary LLM
 primary_llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
-# Creating a Retrieval Augmented Generation (RAG) Chain
 retrieval_augmented_qa_chain = (
     # INVOKE CHAIN WITH: {"question" : "<>"}
     # "question" : populated by getting the value of the "question" key
@@ -95,8 +101,9 @@ retrieval_augmented_qa_chain = (
     | {"response": prompt | primary_llm, "context": itemgetter("context")}
 )
-# Chainlit integration for deployment
-@cl.on_chat_start  # marks a function that will be executed at the start of a user session
 async def start_chat():
     settings = {
         "model": "gpt-4o",
@@ -108,14 +115,17 @@ async def start_chat():
     }
     cl.user_session.set("settings", settings)
-@cl.on_message  # marks a function that should be run each time the chatbot receives a message from a user
 async def handle_message(message: cl.Message):
     settings = cl.user_session.get("settings")
     response = retrieval_augmented_qa_chain.invoke({"question": message.content})
-    #msg = cl.Message(content=response["response"])
-    #await msg.send()
     # Extracting and sending just the content
     content = response["response"].content

+#-----Import Required Libraries-----#
 import os
 from dotenv import load_dotenv
 # Specific imports from the libraries
 from langchain.document_loaders import PyMuPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import OpenAIEmbeddings #Note: Old import was - from langchain_openai import OpenAIEmbeddings
 from langchain_community.vectorstores import Qdrant
 from langchain.prompts import ChatPromptTemplate
+from langchain.chat_models import ChatOpenAI #Note: Old import was - from langchain_openai import ChatOpenAI
 from operator import itemgetter
 from langchain.schema.output_parser import StrOutputParser
 from langchain.schema.runnable import RunnablePassthrough
+#-----Set Environment Variables-----#
 load_dotenv()
 # Load environment variables
 # Initialize OpenAI client after loading the environment variables
 openai.api_key = OPENAI_API_KEY
+#-----Document Loading and Processing -----#
 loader = PyMuPDFLoader("/home/user/app/data/airbnb_q1_2024.pdf")
 documents = loader.load()
+#Note: I changed the loader file path from one that worked locally only to one that worked with Docker. The old file path is loader = PyMuPDFLoader("/Users/sampazar/AIE3-Midterm/data/airbnb_q1_2024.pdf")
 def tiktoken_len(text):
     tokens = tiktoken.encoding_for_model("gpt-4o").encode(text)
     return len(tokens)
 split_chunks = text_splitter.split_documents(documents)
+#-----Embedding and Vector Store Setup-----#
 # Load OpenAI Embeddings Model
 embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
 # Create a Retriever
 retriever = qdrant_vector_store.as_retriever()
+#-----Prompt Template and Language Model Setup-----#
+# Define the prompt template
 template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':
 Context:
 # Define the primary LLM
 primary_llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
+#-----Creating a Retrieval Augmented Generation (RAG) Chain-----#
+# The RAG chain:
+# (1) Takes the user question and retrieves relevant context,
+# (2) Passes the context through unchanged,
+# (3) Formats the prompt with context and question, then send it to the LLM to generate a response
 retrieval_augmented_qa_chain = (
     # INVOKE CHAIN WITH: {"question" : "<>"}
     # "question" : populated by getting the value of the "question" key
     | {"response": prompt | primary_llm, "context": itemgetter("context")}
 )
+#-----Chainlit Integration-----#
+# Sets initial chat settings at the start of a user session
+@cl.on_chat_start
 async def start_chat():
     settings = {
         "model": "gpt-4o",
     }
     cl.user_session.set("settings", settings)
+# Processes incoming messages from the user and sends a response through a series of steps:
+# (1) Retrieves the user's settings
+# (2) Invokes the RAG chain with the user's message
+# (3) Extracts the content from the response and sends it back to the user
+@cl.on_message
 async def handle_message(message: cl.Message):
     settings = cl.user_session.get("settings")
     response = retrieval_augmented_qa_chain.invoke({"question": message.content})
     # Extracting and sending just the content
     content = response["response"].content