Spaces:

JulsdL
/

DeepPDF_AI

Sleeping

App Files Files Community

JulsdL commited on May 1, 2024

Commit

df7870c

unverified ·

2 Parent(s): 6cc65a6 36df286

Merge pull request #2 from JulsdL/chainlit_application

Browse files

Introduction of a Chainlit Application for Interactive Chat-Based Query Handling

Files changed (6) hide show

.chainlit/config.toml +84 -0
CHANGELOG.md +9 -0
Dockerfile +11 -0
app.py +90 -0
chainlit.md +14 -0
requirements.txt +10 -0

.chainlit/config.toml ADDED Viewed

	@@ -0,0 +1,84 @@

+[project]
+# Whether to enable telemetry (default: true). No personal data is collected.
+enable_telemetry = true
+# List of environment variables to be provided by each user to use the app.
+user_env = []
+# Duration (in seconds) during which the session is saved when the connection is lost
+session_timeout = 3600
+# Enable third parties caching (e.g LangChain cache)
+cache = false
+# Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
+# follow_symlink = false
+[features]
+# Show the prompt playground
+prompt_playground = true
+# Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
+unsafe_allow_html = false
+# Process and display mathematical expressions. This can clash with "$" characters in messages.
+latex = false
+# Authorize users to upload files with messages
+multi_modal = true
+# Allows user to use speech to text
+[features.speech_to_text]
+    enabled = false
+    # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
+    # language = "en-US"
+[UI]
+# Name of the app and chatbot.
+name = "Chatbot"
+# Show the readme while the conversation is empty.
+show_readme_as_default = true
+# Description of the app and chatbot. This is used for HTML tags.
+# description = ""
+# Large size content are by default collapsed for a cleaner ui
+default_collapse_content = true
+# The default value for the expand messages settings.
+default_expand_messages = false
+# Hide the chain of thought details from the user in the UI.
+hide_cot = false
+# Link to your github repo. This will add a github button in the UI's header.
+# github = ""
+# Specify a CSS file that can be used to customize the user interface.
+# The CSS file can be served from the public directory or via an external link.
+# custom_css = "/public/test.css"
+# Override default MUI light theme. (Check theme.ts)
+[UI.theme.light]
+    #background = "#FAFAFA"
+    #paper = "#FFFFFF"
+    [UI.theme.light.primary]
+        #main = "#F80061"
+        #dark = "#980039"
+        #light = "#FFE7EB"
+# Override default MUI dark theme. (Check theme.ts)
+[UI.theme.dark]
+    #background = "#FAFAFA"
+    #paper = "#FFFFFF"
+    [UI.theme.dark.primary]
+        #main = "#F80061"
+        #dark = "#980039"
+        #light = "#FFE7EB"
+[meta]
+generated_by = "0.7.700"

CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,12 @@
 ## v0.1.1 (2024-05-01)
 ### Added

+## v0.1.2 (2024-05-01)
+### Added
+- Introduced a Chainlit application for interactive chat-based query handling using LangChain, OpenAI, and Qdrant technologies.
+- Implemented document loading, tokenization, document splitting, embedding, and vector storage functionalities.
+- Added Dockerfile for containerized deployment of the Chainlit application.
+- Included a welcome guide in `chainlit.md` and updated `requirements.txt` with precise versioning for dependencies.
 ## v0.1.1 (2024-05-01)
 ### Added

Dockerfile ADDED Viewed

	@@ -0,0 +1,11 @@

+FROM python:3.9
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+COPY . .
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import os
+from langchain_openai import ChatOpenAI
+from langchain_community.document_loaders import PyMuPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+import tiktoken
+from langchain_openai.embeddings import OpenAIEmbeddings
+from langchain_community.vectorstores import Qdrant
+from langchain_core.prompts import ChatPromptTemplate
+from langchain.retrievers import MultiQueryRetriever
+from langchain_core.runnables import RunnablePassthrough
+from dotenv import load_dotenv
+from operator import itemgetter
+import chainlit as cl
+from chainlit.playground.providers import ChatOpenAI
+# Load environment variables
+load_dotenv()
+# Configuration for OpenAI
+OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
+openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
+# Load the document
+docs = PyMuPDFLoader("https://d18rn0p25nwr6d.cloudfront.net/CIK-0001326801/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load()
+# Tokenization function
+def tiktoken_len(text):
+    tokens = tiktoken.encoding_for_model("gpt-3.5-turbo").encode(text)
+    return len(tokens)
+# Splitting documents into chunks
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50, length_function=tiktoken_len)
+split_chunks = text_splitter.split_documents(docs)
+# Initalize the embedding model
+embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
+# Create a Qdrant vector store
+qdrant_vectorstore = Qdrant.from_documents(split_chunks, embedding_model, location=":memory:", collection_name="Meta 10-k Fillings")
+# Create a retriever from the vector store
+qdrant_retriever = qdrant_vectorstore.as_retriever()
+# Define the RAG prompt
+RAG_PROMPT = """
+CONTEXT:
+{context}
+QUERY:
+{question}
+Answer the query if the context is related to it; otherwise, answer: 'Sorry, the context is unrelated to the query, I can't answer.'
+"""
+rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
+multiquery_retriever = MultiQueryRetriever.from_llm(retriever=qdrant_retriever, llm=openai_chat_model)
+# ChainLit setup for chat interaction
+@cl.on_chat_start
+async def start_chat():
+    settings = {
+        "model": "gpt-3.5-turbo",
+        "temperature": 0,
+        "max_tokens": 500,
+        "top_p": 1,
+        "frequency_penalty": 0,
+        "presence_penalty": 0,
+    }
+    cl.user_session.set("settings", settings)
+@cl.on_message
+async def main(message: cl.Message):
+    question = message.content
+    response = handle_query(question)  # Utilize LangChain functionality to process the question
+    msg = cl.Message(content=response)
+    await msg.send()
+# Define how the queries will be handled using LangChain
+def handle_query(question):
+    retrieval_augmented_qa_chain = (
+        {"context": itemgetter("question") | multiquery_retriever, "question": itemgetter("question")}
+        | RunnablePassthrough.assign(context=itemgetter("context"))
+        | {"response": rag_prompt | openai_chat_model, "context": itemgetter("context")}
+    )
+    response = retrieval_augmented_qa_chain.invoke({"question": question})
+    return response["response"].content

chainlit.md ADDED Viewed

	@@ -0,0 +1,14 @@

+# Welcome to Chainlit! 🚀🤖
+Hi there, Developer! 👋 We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
+## Useful Links 🔗
+- **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚
+- **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬
+We can't wait to see what you create with Chainlit! Happy coding! 💻😊
+## Welcome screen
+To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+langchain==0.1.17
+langchain-core==0.1.48
+langchain-community==0.0.36
+langchain-openai==0.1.4
+qdrant-client==1.9.0
+tiktoken==0.6.0
+pymupdf==1.24.2
+python-dotenv==1.0.1
+chainlit==0.7.700
+openai==1.24.1