ahncs commited on
Commit
bc96a72
·
1 Parent(s): 77fe871
Files changed (5) hide show
  1. .chainlit/config.toml +84 -0
  2. Dockerfile +12 -0
  3. app.py +123 -0
  4. chainlit.md +2 -0
  5. requirements.txt +14 -0
.chainlit/config.toml ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ # Whether to enable telemetry (default: true). No personal data is collected.
3
+ enable_telemetry = true
4
+
5
+ # List of environment variables to be provided by each user to use the app.
6
+ user_env = []
7
+
8
+ # Duration (in seconds) during which the session is saved when the connection is lost
9
+ session_timeout = 3600
10
+
11
+ # Enable third parties caching (e.g LangChain cache)
12
+ cache = false
13
+
14
+ # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
15
+ # follow_symlink = false
16
+
17
+ [features]
18
+ # Show the prompt playground
19
+ prompt_playground = true
20
+
21
+ # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
22
+ unsafe_allow_html = false
23
+
24
+ # Process and display mathematical expressions. This can clash with "$" characters in messages.
25
+ latex = false
26
+
27
+ # Authorize users to upload files with messages
28
+ multi_modal = true
29
+
30
+ # Allows user to use speech to text
31
+ [features.speech_to_text]
32
+ enabled = false
33
+ # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
34
+ # language = "en-US"
35
+
36
+ [UI]
37
+ # Name of the app and chatbot.
38
+ name = "Chatbot"
39
+
40
+ # Show the readme while the conversation is empty.
41
+ show_readme_as_default = true
42
+
43
+ # Description of the app and chatbot. This is used for HTML tags.
44
+ # description = ""
45
+
46
+ # Large size content are by default collapsed for a cleaner ui
47
+ default_collapse_content = true
48
+
49
+ # The default value for the expand messages settings.
50
+ default_expand_messages = false
51
+
52
+ # Hide the chain of thought details from the user in the UI.
53
+ hide_cot = false
54
+
55
+ # Link to your github repo. This will add a github button in the UI's header.
56
+ # github = ""
57
+
58
+ # Specify a CSS file that can be used to customize the user interface.
59
+ # The CSS file can be served from the public directory or via an external link.
60
+ # custom_css = "/public/test.css"
61
+
62
+ # Override default MUI light theme. (Check theme.ts)
63
+ [UI.theme.light]
64
+ #background = "#FAFAFA"
65
+ #paper = "#FFFFFF"
66
+
67
+ [UI.theme.light.primary]
68
+ #main = "#F80061"
69
+ #dark = "#980039"
70
+ #light = "#FFE7EB"
71
+
72
+ # Override default MUI dark theme. (Check theme.ts)
73
+ [UI.theme.dark]
74
+ #background = "#FAFAFA"
75
+ #paper = "#FFFFFF"
76
+
77
+ [UI.theme.dark.primary]
78
+ #main = "#F80061"
79
+ #dark = "#980039"
80
+ #light = "#FFE7EB"
81
+
82
+
83
+ [meta]
84
+ generated_by = "0.7.700"
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11.9
2
+ RUN useradd -m -u 1000 user
3
+ USER user
4
+ ENV HOME=/home/user \
5
+ PATH=/home/user/.local/bin:$PATH
6
+ RUN mkdir -p $HOME/app/data/vectorstore && chown -R user:user $HOME/app/data
7
+ WORKDIR $HOME/app
8
+ COPY --chown=user . $HOME/app
9
+ COPY ./requirements.txt ~/app/requirements.txt
10
+ RUN pip install -r requirements.txt
11
+ COPY . .
12
+ CMD ["chainlit", "run", "app.py", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import openai
3
+ import chainlit as cl
4
+ from langchain_community.document_loaders import PyMuPDFLoader
5
+ from langchain_openai import OpenAIEmbeddings
6
+ from langchain_openai import ChatOpenAI
7
+ from langchain_community.vectorstores import Qdrant
8
+ from langchain.prompts import ChatPromptTemplate
9
+
10
+ from dotenv import load_dotenv
11
+ from operator import itemgetter
12
+ from langchain_huggingface import HuggingFaceEndpoint
13
+ from langchain_community.document_loaders import TextLoader
14
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
15
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
16
+ from langchain_community.vectorstores import FAISS
17
+ from langchain_huggingface import HuggingFaceEndpointEmbeddings
18
+ from langchain_core.prompts import PromptTemplate
19
+ from langchain.schema.output_parser import StrOutputParser
20
+ from langchain.schema.runnable import RunnablePassthrough
21
+ from langchain.schema.runnable.config import RunnableConfig
22
+
23
+ #Load environment variables
24
+ load_dotenv()
25
+ OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
26
+
27
+ #Load 10-K PDF and split into chunks
28
+ loader = PyMuPDFLoader (
29
+ "./data/AirBNB10kfilingsq12024.pdf"
30
+ )
31
+
32
+ documents = loader.load()
33
+
34
+ text_splitter = RecursiveCharacterTextSplitter(
35
+ chunk_size = 1000,
36
+ chunk_overlap = 100
37
+ )
38
+
39
+ documents = text_splitter.split_documents(documents)
40
+
41
+ #Load embeddings model - we'll use OpenAI's text-embedding-3-small
42
+ embeddings = OpenAIEmbeddings(
43
+ model="text-embedding-3-small"
44
+ )
45
+
46
+ #Create QDrant vector store
47
+ qdrant_vector_store = Qdrant.from_documents(
48
+ documents,
49
+ embeddings,
50
+ location=":memory:",
51
+ collection_name="AirBNB10k",
52
+ )
53
+
54
+ #Create Retriever
55
+ retriever = qdrant_vector_store.as_retriever()
56
+
57
+ #Create Prompt Template
58
+ template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':
59
+
60
+ Context:
61
+ {context}
62
+
63
+ Question:
64
+ {question}
65
+ """
66
+
67
+ prompt = ChatPromptTemplate.from_template(template)
68
+
69
+ #Choose LLM - we'll use gpt-4o.
70
+ primary_llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
71
+
72
+ #Set up Chainlit
73
+ @cl.author_rename
74
+ def rename(original_author: str):
75
+ """
76
+ This function can be used to rename the 'author' of a message.
77
+
78
+ In this case, we're overriding the 'Assistant' author to be 'Airbnb10kBot'.
79
+ """
80
+ rename_dict = {
81
+ "Assistant" : "Airbnb10kBot"
82
+ }
83
+ return rename_dict.get(original_author, original_author)
84
+
85
+ @cl.on_chat_start
86
+ async def start_chat():
87
+ """
88
+ This function will be called at the start of every user session.
89
+
90
+ We will build our LCEL RAG chain here, and store it in the user session.
91
+
92
+ The user session is a dictionary that is unique to each user session, and is stored in the memory of the server.
93
+ """
94
+ retrieval_augmented_chain = (
95
+ # INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"}
96
+ # "question" : populated by getting the value of the "question" key
97
+ # "context" : populated by getting the value of the "question" key and chaining it into the base_retriever
98
+ {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
99
+ | prompt | primary_llm
100
+ )
101
+
102
+ cl.user_session.set("retrieval_augmented_chain", retrieval_augmented_chain)
103
+
104
+ @cl.on_message
105
+ async def main(message: cl.Message):
106
+ """
107
+ This function will be called every time a message is recieved from a session.
108
+
109
+ We will use the LCEL RAG chain to generate a response to the user query.
110
+
111
+ The LCEL RAG chain is stored in the user session, and is unique to each user session - this is why we can access it here.
112
+ """
113
+ retrieval_augmented_chain = cl.user_session.get("retrieval_augmented_chain")
114
+
115
+ msg = cl.Message(content="")
116
+
117
+ async for chunk in retrieval_augmented_chain.astream(
118
+ {"question": message.content},
119
+ config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
120
+ ):
121
+ await msg.stream_token(chunk.content)
122
+
123
+ await msg.send()
chainlit.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # HTM LLM - Service Manual - Connex 6000 Welch Allyn Vitals Sign Monitor
2
+ # Ask me anything about the service manual!
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ chainlit==0.7.700
2
+ langchain==0.2.5
3
+ langchain_community==0.2.5
4
+ langchain_core==0.2.9
5
+ langchain_huggingface==0.0.3
6
+ langchain_text_splitters==0.2.1
7
+ python-dotenv==1.0.1
8
+ langchain-openai
9
+ langchainhub
10
+ openai
11
+ faiss-cpu
12
+ qdrant-client
13
+ pymupdf
14
+ pandas