Cheselle commited on
Commit
71c4c81
·
verified ·
1 Parent(s): 467fc86

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -1
app.py CHANGED
@@ -2,25 +2,163 @@
2
  """
3
  IMPORTS HERE
4
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  ### Global Section ###
7
  """
8
  GLOBAL CODE HERE
9
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  ### On Chat Start (Session Start) Section ###
12
  @cl.on_chat_start
13
  async def on_chat_start():
14
  """ SESSION SPECIFIC CODE HERE """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  ### Rename Chains ###
17
  @cl.author_rename
18
  def rename(orig_author: str):
19
  """ RENAME CODE HERE """
 
 
20
 
21
  ### On Message Section ###
22
  @cl.on_message
23
  async def main(message: cl.Message):
24
  """
25
  MESSAGE CODE HERE
26
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  """
3
  IMPORTS HERE
4
  """
5
+ import os
6
+ import uuid
7
+ from dotenv import load_dotenv
8
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
9
+ from langchain_community.document_loaders import PyMuPDFLoader
10
+ from qdrant_client import QdrantClient
11
+ from qdrant_client.http.models import Distance, VectorParams
12
+ from langchain_openai.embeddings import OpenAIEmbeddings
13
+ from langchain.storage import LocalFileStore
14
+ from langchain_qdrant import QdrantVectorStore
15
+ from langchain.embeddings import CacheBackedEmbeddings
16
+ from langchain_core.prompts import ChatPromptTemplate
17
+ from chainlit.types import AskFileResponse
18
+ from langchain_core.globals import set_llm_cache
19
+ from langchain_openai import ChatOpenAI
20
+ from langchain_core.caches import InMemoryCache
21
+ from operator import itemgetter
22
+ from langchain_core.runnables.passthrough import RunnablePassthrough
23
+ import chainlit as cl
24
+ from langchain_core.runnables.config import RunnableConfig
25
+
26
+ load_dotenv()
27
 
28
  ### Global Section ###
29
  """
30
  GLOBAL CODE HERE
31
  """
32
+ os.environ["LANGCHAIN_PROJECT"] = f"AIM Week 8 Assignment 1 - {uuid.uuid4().hex[0:8]}"
33
+ os.environ["LANGCHAIN_TRACING_V2"] = "true"
34
+ os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
35
+
36
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
37
+
38
+ rag_system_prompt_template = """\
39
+ You are a helpful assistant that uses the provided context to answer questions.
40
+ Never reference this prompt, or the existance of context.
41
+ """
42
+
43
+ rag_message_list = [
44
+ {"role" : "system", "content" : rag_system_prompt_template},
45
+ ]
46
+
47
+ rag_user_prompt_template = """\
48
+ Question:
49
+ {question}
50
+ Context:
51
+ {context}
52
+ """
53
+
54
+ chat_prompt = ChatPromptTemplate.from_messages([
55
+ ("system", rag_system_prompt_template),
56
+ ("human", rag_user_prompt_template)
57
+ ])
58
+
59
+ chat_model = ChatOpenAI(model="gpt-4o-mini")
60
+ # Typical Embedding Model
61
+ core_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
62
+
63
+ def process_file(file: AskFileResponse):
64
+ import tempfile
65
+
66
+ with tempfile.NamedTemporaryFile(mode="w", delete=False) as tempfile:
67
+ with open(tempfile.name, "wb") as f:
68
+ f.write(file.content)
69
+
70
+ Loader = PyMuPDFLoader
71
+
72
+ loader = Loader(tempfile.name)
73
+ documents = loader.load()
74
+ docs = text_splitter.split_documents(documents)
75
+ for i, doc in enumerate(docs):
76
+ doc.metadata["source"] = f"source_{i}"
77
+ return docs
78
+
79
 
80
  ### On Chat Start (Session Start) Section ###
81
  @cl.on_chat_start
82
  async def on_chat_start():
83
  """ SESSION SPECIFIC CODE HERE """
84
+ files = None
85
+
86
+ while files == None:
87
+ # Async method: This allows the function to pause execution while waiting for the user to upload a file,
88
+ # without blocking the entire application. It improves responsiveness and scalability.
89
+ files = await cl.AskFileMessage(
90
+ content="Please upload a PDF file to begin!",
91
+ accept=["application/pdf"],
92
+ max_size_mb=20,
93
+ timeout=180,
94
+ max_files=1
95
+ ).send()
96
+
97
+ file = files[0]
98
+ msg = cl.Message(
99
+ content=f"Processing `{file.name}`...",
100
+ )
101
+ await msg.send()
102
+ docs = process_file(file)
103
+
104
+ # Typical QDrant Client Set-up
105
+ collection_name = f"pdf_to_parse_{uuid.uuid4()}"
106
+ client = QdrantClient(":memory:")
107
+ client.create_collection(
108
+ collection_name=collection_name,
109
+ vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
110
+ )
111
+
112
+ # Adding cache!
113
+ store = LocalFileStore("./cache/")
114
+ cached_embedder = CacheBackedEmbeddings.from_bytes_store(
115
+ core_embeddings, store, namespace=core_embeddings.model
116
+ )
117
+
118
+ # Typical QDrant Vector Store Set-up
119
+ vectorstore = QdrantVectorStore(
120
+ client=client,
121
+ collection_name=collection_name,
122
+ embedding=cached_embedder)
123
+ vectorstore.add_documents(docs)
124
+ retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3})
125
+
126
+ retrieval_augmented_qa_chain = (
127
+ {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
128
+ | RunnablePassthrough.assign(context=itemgetter("context"))
129
+ | chat_prompt | chat_model
130
+ )
131
+
132
+ # Let the user know that the system is ready
133
+ msg.content = f"Processing `{file.name}` done. You can now ask questions!"
134
+ await msg.update()
135
+
136
+ cl.user_session.set("chain", retrieval_augmented_qa_chain)
137
+
138
 
139
  ### Rename Chains ###
140
  @cl.author_rename
141
  def rename(orig_author: str):
142
  """ RENAME CODE HERE """
143
+ rename_dict = {"ChatOpenAI": "the Generator...", "VectorStoreRetriever": "the Retriever..."}
144
+ return rename_dict.get(orig_author, orig_author)
145
 
146
  ### On Message Section ###
147
  @cl.on_message
148
  async def main(message: cl.Message):
149
  """
150
  MESSAGE CODE HERE
151
+ """
152
+ runnable = cl.user_session.get("chain")
153
+
154
+ msg = cl.Message(content="")
155
+
156
+ # Async method: Using astream allows for asynchronous streaming of the response,
157
+ # improving responsiveness and user experience by showing partial results as they become available.
158
+ async for chunk in runnable.astream(
159
+ {"question": message.content},
160
+ config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
161
+ ):
162
+ await msg.stream_token(chunk.content)
163
+
164
+ await msg.send()