Sam commited on
Commit
20fd337
·
1 Parent(s): ce74f64

Renamed midterm_app.py to app.py and updated Dockerfile

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -1
  2. app.py +124 -0
Dockerfile CHANGED
@@ -24,4 +24,4 @@ RUN pip install --no-cache-dir -r requirements.txt
24
  COPY --chown=user . $HOME/app
25
 
26
  # Run the application
27
- CMD ["chainlit", "run", "midterm_app.py", "--port", "7860"]
 
24
  COPY --chown=user . $HOME/app
25
 
26
  # Run the application
27
+ CMD ["chainlit", "run", "app.py", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import Required Libraries
2
+ import os
3
+ from dotenv import load_dotenv
4
+
5
+ import openai
6
+ import fitz # PyMuPDF
7
+ import pandas as pd
8
+ from transformers import pipeline
9
+ from qdrant_client import QdrantClient
10
+ from qdrant_client.http import models as qdrant_models
11
+ import chainlit as cl
12
+ import tiktoken
13
+
14
+ # Specific imports from the libraries
15
+ from langchain.document_loaders import PyMuPDFLoader
16
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
17
+ from langchain.embeddings import OpenAIEmbeddings
18
+ #old import from langchain_openai import OpenAIEmbeddings
19
+ from langchain_community.vectorstores import Qdrant
20
+ from langchain.prompts import ChatPromptTemplate
21
+ from langchain.chat_models import ChatOpenAI
22
+ #old import from langchain_openai import ChatOpenAI
23
+ from operator import itemgetter
24
+ from langchain.schema.output_parser import StrOutputParser
25
+ from langchain.schema.runnable import RunnablePassthrough
26
+
27
+ # Set Environment Variables
28
+ load_dotenv()
29
+
30
+ # Load environment variables
31
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
32
+
33
+ # Initialize OpenAI client after loading the environment variables
34
+ openai.api_key = OPENAI_API_KEY
35
+
36
+ # Load and split documents
37
+ loader = PyMuPDFLoader("/home/user/app/data/airbnb_q1_2024.pdf")
38
+ #old file path is loader = PyMuPDFLoader("/Users/sampazar/AIE3-Midterm/data/airbnb_q1_2024.pdf")
39
+ documents = loader.load()
40
+
41
+ def tiktoken_len(text):
42
+ tokens = tiktoken.encoding_for_model("gpt-4o").encode(text)
43
+ return len(tokens)
44
+
45
+ text_splitter = RecursiveCharacterTextSplitter(
46
+ chunk_size=150,
47
+ chunk_overlap=100,
48
+ length_function = tiktoken_len
49
+ )
50
+
51
+ split_chunks = text_splitter.split_documents(documents)
52
+
53
+
54
+ # Load OpenAI Embeddings Model
55
+ embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
56
+
57
+ # Creating a Qdrant Vector Store
58
+ qdrant_vector_store = Qdrant.from_documents(
59
+ split_chunks,
60
+ embeddings,
61
+ location=":memory:",
62
+ collection_name="Airbnb_Q1_2024",
63
+ )
64
+
65
+ # Create a Retriever
66
+ retriever = qdrant_vector_store.as_retriever()
67
+
68
+ # Create a prompt template
69
+ template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':
70
+
71
+ Context:
72
+ {context}
73
+
74
+ Question:
75
+ {question}
76
+ """
77
+
78
+ prompt = ChatPromptTemplate.from_template(template)
79
+
80
+ # Define the primary LLM
81
+ primary_llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
82
+
83
+ # Creating a Retrieval Augmented Generation (RAG) Chain
84
+ retrieval_augmented_qa_chain = (
85
+ # INVOKE CHAIN WITH: {"question" : "<>"}
86
+ # "question" : populated by getting the value of the "question" key
87
+ # "context" : populated by getting the value of the "question" key and chaining it into the base_retriever
88
+ {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
89
+ # "context" : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)
90
+ # by getting the value of the "context" key from the previous step
91
+ | RunnablePassthrough.assign(context=itemgetter("context"))
92
+ # "response" : the "context" and "question" values are used to format our prompt object and then piped
93
+ # into the LLM and stored in a key called "response"
94
+ # "context" : populated by getting the value of the "context" key from the previous step
95
+ | {"response": prompt | primary_llm, "context": itemgetter("context")}
96
+ )
97
+
98
+ # Chainlit integration for deployment
99
+ @cl.on_chat_start # marks a function that will be executed at the start of a user session
100
+ async def start_chat():
101
+ settings = {
102
+ "model": "gpt-4o",
103
+ "temperature": 0,
104
+ "max_tokens": 500,
105
+ "top_p": 1,
106
+ "frequency_penalty": 0,
107
+ "presence_penalty": 0,
108
+ }
109
+ cl.user_session.set("settings", settings)
110
+
111
+ @cl.on_message # marks a function that should be run each time the chatbot receives a message from a user
112
+ async def handle_message(message: cl.Message):
113
+ settings = cl.user_session.get("settings")
114
+
115
+ response = retrieval_augmented_qa_chain.invoke({"question": message.content})
116
+
117
+ #msg = cl.Message(content=response["response"])
118
+ #await msg.send()
119
+
120
+ # Extracting and sending just the content
121
+ content = response["response"].content
122
+ pretty_content = content.strip() # Remove any leading/trailing whitespace
123
+
124
+ await cl.Message(content=pretty_content).send()