JulsdL commited on
Commit
df7870c
Β·
unverified Β·
2 Parent(s): 6cc65a6 36df286

Merge pull request #2 from JulsdL/chainlit_application

Browse files

Introduction of a Chainlit Application for Interactive Chat-Based Query Handling

Files changed (6) hide show
  1. .chainlit/config.toml +84 -0
  2. CHANGELOG.md +9 -0
  3. Dockerfile +11 -0
  4. app.py +90 -0
  5. chainlit.md +14 -0
  6. requirements.txt +10 -0
.chainlit/config.toml ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ # Whether to enable telemetry (default: true). No personal data is collected.
3
+ enable_telemetry = true
4
+
5
+ # List of environment variables to be provided by each user to use the app.
6
+ user_env = []
7
+
8
+ # Duration (in seconds) during which the session is saved when the connection is lost
9
+ session_timeout = 3600
10
+
11
+ # Enable third parties caching (e.g LangChain cache)
12
+ cache = false
13
+
14
+ # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
15
+ # follow_symlink = false
16
+
17
+ [features]
18
+ # Show the prompt playground
19
+ prompt_playground = true
20
+
21
+ # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
22
+ unsafe_allow_html = false
23
+
24
+ # Process and display mathematical expressions. This can clash with "$" characters in messages.
25
+ latex = false
26
+
27
+ # Authorize users to upload files with messages
28
+ multi_modal = true
29
+
30
+ # Allows user to use speech to text
31
+ [features.speech_to_text]
32
+ enabled = false
33
+ # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
34
+ # language = "en-US"
35
+
36
+ [UI]
37
+ # Name of the app and chatbot.
38
+ name = "Chatbot"
39
+
40
+ # Show the readme while the conversation is empty.
41
+ show_readme_as_default = true
42
+
43
+ # Description of the app and chatbot. This is used for HTML tags.
44
+ # description = ""
45
+
46
+ # Large size content are by default collapsed for a cleaner ui
47
+ default_collapse_content = true
48
+
49
+ # The default value for the expand messages settings.
50
+ default_expand_messages = false
51
+
52
+ # Hide the chain of thought details from the user in the UI.
53
+ hide_cot = false
54
+
55
+ # Link to your github repo. This will add a github button in the UI's header.
56
+ # github = ""
57
+
58
+ # Specify a CSS file that can be used to customize the user interface.
59
+ # The CSS file can be served from the public directory or via an external link.
60
+ # custom_css = "/public/test.css"
61
+
62
+ # Override default MUI light theme. (Check theme.ts)
63
+ [UI.theme.light]
64
+ #background = "#FAFAFA"
65
+ #paper = "#FFFFFF"
66
+
67
+ [UI.theme.light.primary]
68
+ #main = "#F80061"
69
+ #dark = "#980039"
70
+ #light = "#FFE7EB"
71
+
72
+ # Override default MUI dark theme. (Check theme.ts)
73
+ [UI.theme.dark]
74
+ #background = "#FAFAFA"
75
+ #paper = "#FFFFFF"
76
+
77
+ [UI.theme.dark.primary]
78
+ #main = "#F80061"
79
+ #dark = "#980039"
80
+ #light = "#FFE7EB"
81
+
82
+
83
+ [meta]
84
+ generated_by = "0.7.700"
CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
 
 
 
 
 
 
 
 
 
1
  ## v0.1.1 (2024-05-01)
2
 
3
  ### Added
 
1
+ ## v0.1.2 (2024-05-01)
2
+
3
+ ### Added
4
+
5
+ - Introduced a Chainlit application for interactive chat-based query handling using LangChain, OpenAI, and Qdrant technologies.
6
+ - Implemented document loading, tokenization, document splitting, embedding, and vector storage functionalities.
7
+ - Added Dockerfile for containerized deployment of the Chainlit application.
8
+ - Included a welcome guide in `chainlit.md` and updated `requirements.txt` with precise versioning for dependencies.
9
+
10
  ## v0.1.1 (2024-05-01)
11
 
12
  ### Added
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain_openai import ChatOpenAI
3
+ from langchain_community.document_loaders import PyMuPDFLoader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ import tiktoken
6
+ from langchain_openai.embeddings import OpenAIEmbeddings
7
+ from langchain_community.vectorstores import Qdrant
8
+ from langchain_core.prompts import ChatPromptTemplate
9
+ from langchain.retrievers import MultiQueryRetriever
10
+ from langchain_core.runnables import RunnablePassthrough
11
+ from dotenv import load_dotenv
12
+ from operator import itemgetter
13
+ import chainlit as cl
14
+ from chainlit.playground.providers import ChatOpenAI
15
+
16
+ # Load environment variables
17
+ load_dotenv()
18
+
19
+ # Configuration for OpenAI
20
+ OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
21
+ openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
22
+
23
+ # Load the document
24
+ docs = PyMuPDFLoader("https://d18rn0p25nwr6d.cloudfront.net/CIK-0001326801/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load()
25
+
26
+ # Tokenization function
27
+ def tiktoken_len(text):
28
+ tokens = tiktoken.encoding_for_model("gpt-3.5-turbo").encode(text)
29
+ return len(tokens)
30
+
31
+ # Splitting documents into chunks
32
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50, length_function=tiktoken_len)
33
+
34
+ split_chunks = text_splitter.split_documents(docs)
35
+
36
+ # Initalize the embedding model
37
+ embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
38
+
39
+ # Create a Qdrant vector store
40
+ qdrant_vectorstore = Qdrant.from_documents(split_chunks, embedding_model, location=":memory:", collection_name="Meta 10-k Fillings")
41
+
42
+ # Create a retriever from the vector store
43
+ qdrant_retriever = qdrant_vectorstore.as_retriever()
44
+
45
+ # Define the RAG prompt
46
+
47
+ RAG_PROMPT = """
48
+ CONTEXT:
49
+ {context}
50
+
51
+ QUERY:
52
+ {question}
53
+
54
+ Answer the query if the context is related to it; otherwise, answer: 'Sorry, the context is unrelated to the query, I can't answer.'
55
+ """
56
+ rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
57
+
58
+ multiquery_retriever = MultiQueryRetriever.from_llm(retriever=qdrant_retriever, llm=openai_chat_model)
59
+
60
+ # ChainLit setup for chat interaction
61
+ @cl.on_chat_start
62
+ async def start_chat():
63
+ settings = {
64
+ "model": "gpt-3.5-turbo",
65
+ "temperature": 0,
66
+ "max_tokens": 500,
67
+ "top_p": 1,
68
+ "frequency_penalty": 0,
69
+ "presence_penalty": 0,
70
+ }
71
+ cl.user_session.set("settings", settings)
72
+
73
+ @cl.on_message
74
+ async def main(message: cl.Message):
75
+
76
+ question = message.content
77
+ response = handle_query(question) # Utilize LangChain functionality to process the question
78
+
79
+ msg = cl.Message(content=response)
80
+ await msg.send()
81
+
82
+ # Define how the queries will be handled using LangChain
83
+ def handle_query(question):
84
+ retrieval_augmented_qa_chain = (
85
+ {"context": itemgetter("question") | multiquery_retriever, "question": itemgetter("question")}
86
+ | RunnablePassthrough.assign(context=itemgetter("context"))
87
+ | {"response": rag_prompt | openai_chat_model, "context": itemgetter("context")}
88
+ )
89
+ response = retrieval_augmented_qa_chain.invoke({"question": question})
90
+ return response["response"].content
chainlit.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Welcome to Chainlit! πŸš€πŸ€–
2
+
3
+ Hi there, Developer! πŸ‘‹ We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
4
+
5
+ ## Useful Links πŸ”—
6
+
7
+ - **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) πŸ“š
8
+ - **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! πŸ’¬
9
+
10
+ We can't wait to see what you create with Chainlit! Happy coding! πŸ’»πŸ˜Š
11
+
12
+ ## Welcome screen
13
+
14
+ To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain==0.1.17
2
+ langchain-core==0.1.48
3
+ langchain-community==0.0.36
4
+ langchain-openai==0.1.4
5
+ qdrant-client==1.9.0
6
+ tiktoken==0.6.0
7
+ pymupdf==1.24.2
8
+ python-dotenv==1.0.1
9
+ chainlit==0.7.700
10
+ openai==1.24.1