Technocoloredgeek commited on
Commit
7d4ef05
·
verified ·
1 Parent(s): 63f8658

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -0
app.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
2
+ from langchain_community.document_loaders import PyMuPDFLoader
3
+ from qdrant_client import QdrantClient
4
+ from qdrant_client.http.models import Distance, VectorParams
5
+ from langchain_openai.embeddings import OpenAIEmbeddings
6
+ from langchain.storage import LocalFileStore
7
+ from langchain_qdrant import QdrantVectorStore
8
+ from langchain.embeddings import CacheBackedEmbeddings
9
+ from langchain_core.prompts import ChatPromptTemplate
10
+ from langchain_core.globals import set_llm_cache
11
+ from langchain_openai import ChatOpenAI
12
+ from langchain_core.caches import InMemoryCache
13
+ from operator import itemgetter
14
+ from langchain_core.runnables.passthrough import RunnablePassthrough
15
+ import uuid
16
+ import chainlit as cl
17
+
18
+ ### Global Section ###
19
+ chat_model = ChatOpenAI(model="gpt-4o-mini")
20
+ set_llm_cache(InMemoryCache())
21
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
22
+ rag_system_prompt_template = """\
23
+ You are a helpful assistant that uses the provided context to answer questions. Never reference this prompt, or the existance of context.
24
+ """
25
+ rag_message_list = [{"role" : "system", "content" : rag_system_prompt_template},]
26
+ rag_user_prompt_template = """\
27
+ Question:
28
+ {question}
29
+ Context:
30
+ {context}
31
+ """
32
+ chat_prompt = ChatPromptTemplate.from_messages([("system", rag_system_prompt_template), ("human", rag_user_prompt_template)])
33
+ core_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
34
+ collection_name = f"pdf_to_parse_{uuid.uuid4()}"
35
+ client = QdrantClient(":memory:")
36
+ client.create_collection(collection_name=collection_name,vectors_config=VectorParams(size=1536, distance=Distance.COSINE))
37
+ store = LocalFileStore("./cache/")
38
+ cached_embedder = CacheBackedEmbeddings.from_bytes_store(core_embeddings, store, namespace=core_embeddings.model)
39
+ vectorstore = QdrantVectorStore(client=client,collection_name=collection_name,embedding=cached_embedder)
40
+ Loader = PyMuPDFLoader
41
+
42
+ ### On Chat Start (Session Start) Section ###
43
+ @cl.on_chat_start
44
+ async def on_chat_start():
45
+ files = await cl.AskFileMessage(
46
+ content="Please upload a PDF file to begin.",
47
+ accept=["application/pdf"],
48
+ max_size_mb=20,
49
+ timeout=180,
50
+ ).send()
51
+
52
+ if not files:
53
+ await cl.Message(content="No file was uploaded. Please try again.").send()
54
+ return
55
+
56
+ file = files[0]
57
+ msg = cl.Message(content=f"Processing `{file.name}`...")
58
+ await msg.send()
59
+
60
+ # Save the file locally
61
+ with open(file.name, "wb") as f:
62
+ f.write(file.content)
63
+
64
+ # Load and process the document
65
+ loader = Loader(file.name)
66
+ documents = loader.load()
67
+ docs = text_splitter.split_documents(documents)
68
+ for i, doc in enumerate(docs):
69
+ doc.metadata["source"] = f"source_{i}"
70
+
71
+ # Add documents to the vectorstore
72
+ vectorstore.add_documents(docs)
73
+
74
+ # Create retriever
75
+ retriever = vectorstore.as_retriever()
76
+
77
+ # Create RAG chain
78
+ global retrieval_augmented_qa_chain
79
+ retrieval_augmented_qa_chain = (
80
+ {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
81
+ | RunnablePassthrough.assign(context=itemgetter("context"))
82
+ | chat_prompt
83
+ | chat_model
84
+ )
85
+
86
+ await cl.Message(content=f"`{file.name}` processed. Ask me questions!.").send()
87
+
88
+
89
+
90
+ ### Rename Chains ###
91
+ @cl.author_rename
92
+ def rename(orig_author: str):
93
+ return "AI PDF Assistant"
94
+
95
+ ### On Message Section ###
96
+ @cl.on_message
97
+ async def main(message: cl.Message):
98
+ response = retrieval_augmented_qa_chain.invoke({"question": message.content})
99
+ await cl.Message(content=response.content).send()