Spaces:
Sleeping
Sleeping
tmlinhdinh
commited on
Commit
·
dc89a4d
1
Parent(s):
d6caf90
deploying HF
Browse files- .chainlit/config.toml +84 -0
- .gitignore +1 -0
- Dockerfile +11 -0
- README.md +118 -1
- app.py +84 -0
- chainlit.md +1 -0
- requirements.txt +9 -0
.chainlit/config.toml
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
# Whether to enable telemetry (default: true). No personal data is collected.
|
3 |
+
enable_telemetry = true
|
4 |
+
|
5 |
+
# List of environment variables to be provided by each user to use the app.
|
6 |
+
user_env = []
|
7 |
+
|
8 |
+
# Duration (in seconds) during which the session is saved when the connection is lost
|
9 |
+
session_timeout = 3600
|
10 |
+
|
11 |
+
# Enable third parties caching (e.g LangChain cache)
|
12 |
+
cache = false
|
13 |
+
|
14 |
+
# Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
|
15 |
+
follow_symlink = true
|
16 |
+
|
17 |
+
[features]
|
18 |
+
# Show the prompt playground
|
19 |
+
prompt_playground = true
|
20 |
+
|
21 |
+
# Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
|
22 |
+
unsafe_allow_html = false
|
23 |
+
|
24 |
+
# Process and display mathematical expressions. This can clash with "$" characters in messages.
|
25 |
+
latex = false
|
26 |
+
|
27 |
+
# Authorize users to upload files with messages
|
28 |
+
multi_modal = true
|
29 |
+
|
30 |
+
# Allows user to use speech to text
|
31 |
+
[features.speech_to_text]
|
32 |
+
enabled = false
|
33 |
+
# See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
|
34 |
+
# language = "en-US"
|
35 |
+
|
36 |
+
[UI]
|
37 |
+
# Name of the app and chatbot.
|
38 |
+
name = "Chatbot"
|
39 |
+
|
40 |
+
# Show the readme while the conversation is empty.
|
41 |
+
show_readme_as_default = true
|
42 |
+
|
43 |
+
# Description of the app and chatbot. This is used for HTML tags.
|
44 |
+
# description = ""
|
45 |
+
|
46 |
+
# Large size content are by default collapsed for a cleaner ui
|
47 |
+
default_collapse_content = true
|
48 |
+
|
49 |
+
# The default value for the expand messages settings.
|
50 |
+
default_expand_messages = false
|
51 |
+
|
52 |
+
# Hide the chain of thought details from the user in the UI.
|
53 |
+
hide_cot = false
|
54 |
+
|
55 |
+
# Link to your github repo. This will add a github button in the UI's header.
|
56 |
+
# github = ""
|
57 |
+
|
58 |
+
# Specify a CSS file that can be used to customize the user interface.
|
59 |
+
# The CSS file can be served from the public directory or via an external link.
|
60 |
+
# custom_css = "/public/test.css"
|
61 |
+
|
62 |
+
# Override default MUI light theme. (Check theme.ts)
|
63 |
+
[UI.theme.light]
|
64 |
+
#background = "#FAFAFA"
|
65 |
+
#paper = "#FFFFFF"
|
66 |
+
|
67 |
+
[UI.theme.light.primary]
|
68 |
+
#main = "#F80061"
|
69 |
+
#dark = "#980039"
|
70 |
+
#light = "#FFE7EB"
|
71 |
+
|
72 |
+
# Override default MUI dark theme. (Check theme.ts)
|
73 |
+
[UI.theme.dark]
|
74 |
+
#background = "#FAFAFA"
|
75 |
+
#paper = "#FFFFFF"
|
76 |
+
|
77 |
+
[UI.theme.dark.primary]
|
78 |
+
#main = "#F80061"
|
79 |
+
#dark = "#980039"
|
80 |
+
#light = "#FFE7EB"
|
81 |
+
|
82 |
+
|
83 |
+
[meta]
|
84 |
+
generated_by = "0.7.700"
|
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__pycache__
|
Dockerfile
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
RUN useradd -m -u 1000 user
|
3 |
+
USER user
|
4 |
+
ENV HOME=/home/user \
|
5 |
+
PATH=/home/user/.local/bin:$PATH
|
6 |
+
WORKDIR $HOME/app
|
7 |
+
COPY --chown=user . $HOME/app
|
8 |
+
COPY ./requirements.txt ~/app/requirements.txt
|
9 |
+
RUN pip install -r requirements.txt
|
10 |
+
COPY . .
|
11 |
+
CMD ["chainlit", "run", "app.py", "--port", "7860"]
|
README.md
CHANGED
@@ -1 +1,118 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: DeployPythonicRAG
|
3 |
+
emoji: 📉
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: purple
|
6 |
+
sdk: docker
|
7 |
+
pinned: false
|
8 |
+
license: apache-2.0
|
9 |
+
---
|
10 |
+
|
11 |
+
# Deploying Pythonic Chat With Your Text File Application
|
12 |
+
|
13 |
+
In today's breakout rooms, we will be following the processed that you saw during the challenge - for reference, the instructions for that are available [here](https://github.com/AI-Maker-Space/Beyond-ChatGPT/tree/main).
|
14 |
+
|
15 |
+
Today, we will repeat the same process - but powered by our Pythonic RAG implementation we created last week.
|
16 |
+
|
17 |
+
You'll notice a few differences in the `app.py` logic - as well as a few changes to the `aimakerspace` package to get things working smoothly with Chainlit.
|
18 |
+
|
19 |
+
## Reference Diagram (It's Busy, but it works)
|
20 |
+
|
21 |
+

|
22 |
+
|
23 |
+
## Deploying the Application to Hugging Face Space
|
24 |
+
|
25 |
+
Due to the way the repository is created - it should be straightforward to deploy this to a Hugging Face Space!
|
26 |
+
|
27 |
+
> NOTE: If you wish to go through the local deployments using `chainlit run app.py` and Docker - please feel free to do so!
|
28 |
+
|
29 |
+
<details>
|
30 |
+
<summary>Creating a Hugging Face Space</summary>
|
31 |
+
|
32 |
+
1. Navigate to the `Spaces` tab.
|
33 |
+
|
34 |
+

|
35 |
+
|
36 |
+
2. Click on `Create new Space`
|
37 |
+
|
38 |
+

|
39 |
+
|
40 |
+
3. Create the Space by providing values in the form. Make sure you've selected "Docker" as your Space SDK.
|
41 |
+
|
42 |
+

|
43 |
+
|
44 |
+
</details>
|
45 |
+
|
46 |
+
<details>
|
47 |
+
<summary>Adding this Repository to the Newly Created Space</summary>
|
48 |
+
|
49 |
+
1. Collect the SSH address from the newly created Space.
|
50 |
+
|
51 |
+

|
52 |
+
|
53 |
+
> NOTE: The address is the component that starts with `[email protected]:spaces/`.
|
54 |
+
|
55 |
+
2. Use the command:
|
56 |
+
|
57 |
+
```bash
|
58 |
+
git remote add hf HF_SPACE_SSH_ADDRESS_HERE
|
59 |
+
```
|
60 |
+
|
61 |
+
3. Use the command:
|
62 |
+
|
63 |
+
```bash
|
64 |
+
git pull hf main --no-rebase --allow-unrelated-histories -X ours
|
65 |
+
```
|
66 |
+
|
67 |
+
4. Use the command:
|
68 |
+
|
69 |
+
```bash
|
70 |
+
git add .
|
71 |
+
```
|
72 |
+
|
73 |
+
5. Use the command:
|
74 |
+
|
75 |
+
```bash
|
76 |
+
git commit -m "Deploying Pythonic RAG"
|
77 |
+
```
|
78 |
+
|
79 |
+
6. Use the command:
|
80 |
+
|
81 |
+
```bash
|
82 |
+
git push hf main
|
83 |
+
```
|
84 |
+
|
85 |
+
7. The Space should automatically build as soon as the push is completed!
|
86 |
+
|
87 |
+
> NOTE: The build will fail before you complete the following steps!
|
88 |
+
|
89 |
+
</details>
|
90 |
+
|
91 |
+
<details>
|
92 |
+
<summary>Adding OpenAI Secrets to the Space</summary>
|
93 |
+
|
94 |
+
1. Navigate to your Space settings.
|
95 |
+
|
96 |
+

|
97 |
+
|
98 |
+
2. Navigate to `Variables and secrets` on the Settings page and click `New secret`:
|
99 |
+
|
100 |
+

|
101 |
+
|
102 |
+
3. In the `Name` field - input `OPENAI_API_KEY` in the `Value (private)` field, put your OpenAI API Key.
|
103 |
+
|
104 |
+

|
105 |
+
|
106 |
+
4. The Space will begin rebuilding!
|
107 |
+
|
108 |
+
</details>
|
109 |
+
|
110 |
+
## 🎉
|
111 |
+
|
112 |
+
You just deployed Pythonic RAG!
|
113 |
+
|
114 |
+
Try uploading a text file and asking some questions!
|
115 |
+
|
116 |
+
## 🚧CHALLENGE MODE 🚧
|
117 |
+
|
118 |
+
For more of a challenge, please reference [Building a Chainlit App](./BuildingAChainlitApp.md)!
|
app.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import chainlit as cl
|
2 |
+
from operator import itemgetter
|
3 |
+
|
4 |
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
5 |
+
|
6 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
7 |
+
from langchain.prompts import ChatPromptTemplate
|
8 |
+
from langchain.schema import StrOutputParser
|
9 |
+
|
10 |
+
from langchain_community.document_loaders import PyMuPDFLoader
|
11 |
+
from langchain_community.vectorstores import Qdrant
|
12 |
+
|
13 |
+
|
14 |
+
# Constants (you can adjust these as per your environment)
|
15 |
+
# DATA LOADER
|
16 |
+
DATA_LINK1 = "https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf"
|
17 |
+
DATA_LINK2 = "https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf"
|
18 |
+
|
19 |
+
# CHUNKING CONFIGS
|
20 |
+
CHUNK_SIZE = 500
|
21 |
+
CHUNK_OVERLAP = 50
|
22 |
+
|
23 |
+
# RETRIEVER CONFIGS
|
24 |
+
COLLECTION_NAME = "AI Bill of Rights"
|
25 |
+
|
26 |
+
EMBEDDING_MODEL = "text-embedding-3-small"
|
27 |
+
|
28 |
+
# FINAL RAG CONFIGS
|
29 |
+
QA_MODEL = "gpt-4o"
|
30 |
+
|
31 |
+
RAG_PROMPT = """\
|
32 |
+
Given a provided context and question, you must answer the question based only on context.
|
33 |
+
|
34 |
+
If you cannot answer the question based on the context - you must say "I don't know".
|
35 |
+
|
36 |
+
Context: {context}
|
37 |
+
Question: {question}
|
38 |
+
"""
|
39 |
+
|
40 |
+
# Function to chunk documents
|
41 |
+
def chunk_documents(unchunked_documents, chunk_size, chunk_overlap):
|
42 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
43 |
+
chunk_size=chunk_size,
|
44 |
+
chunk_overlap=chunk_overlap,
|
45 |
+
)
|
46 |
+
return text_splitter.split_documents(unchunked_documents)
|
47 |
+
|
48 |
+
# Function to build retriever
|
49 |
+
def build_retriever(chunked_documents, embeddings, collection_name):
|
50 |
+
vectorstore = Qdrant.from_documents(
|
51 |
+
documents=chunked_documents,
|
52 |
+
embedding=embeddings,
|
53 |
+
location=":memory:", # Storing in-memory for demonstration
|
54 |
+
collection_name=collection_name,
|
55 |
+
)
|
56 |
+
retriever = vectorstore.as_retriever()
|
57 |
+
return retriever
|
58 |
+
|
59 |
+
# Load documents and prepare retriever
|
60 |
+
rag_documents_1 = PyMuPDFLoader(file_path=DATA_LINK1).load()
|
61 |
+
rag_documents_2 = PyMuPDFLoader(file_path=DATA_LINK2).load()
|
62 |
+
|
63 |
+
chunked_rag_documents = chunk_documents(rag_documents_1, CHUNK_SIZE, CHUNK_OVERLAP) + \
|
64 |
+
chunk_documents(rag_documents_2, CHUNK_SIZE, CHUNK_OVERLAP)
|
65 |
+
|
66 |
+
embeddings = OpenAIEmbeddings(model=EMBEDDING_MODEL)
|
67 |
+
retriever = build_retriever(chunked_rag_documents, embeddings, COLLECTION_NAME)
|
68 |
+
|
69 |
+
rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
|
70 |
+
qa_llm = ChatOpenAI(model=QA_MODEL)
|
71 |
+
|
72 |
+
rag_chain = (
|
73 |
+
{"context": itemgetter("question") | retriever, "question": itemgetter("question")}
|
74 |
+
| rag_prompt | llm | StrOutputParser()
|
75 |
+
)
|
76 |
+
|
77 |
+
# Chainlit app
|
78 |
+
@cl.on_message
|
79 |
+
async def main(message: str):
|
80 |
+
response = rag_chain.invoke({"question": message})
|
81 |
+
await cl.Message(
|
82 |
+
content=response["response"], # Extract the response from the chain
|
83 |
+
author="AI"
|
84 |
+
).send()
|
chainlit.md
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Welcome to our AI Industry Insights chatbot! As an AI Solutions Engineer, I've worked closely with stakeholders to tackle a major concern: the rapidly evolving impact of AI, particularly in the context of politics and ethical enterprise applications. With so much uncertainty, it's clear that many people could benefit from a tool that provides real-time, nuanced guidance on how AI is shaping industries and government policies—especially with the current election cycle underway. Our chatbot is here to help you stay informed and navigate the complexities of AI with confidence.
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
chainlit==0.7.700
|
2 |
+
langchain
|
3 |
+
langchain-core==0.2.38
|
4 |
+
langchain-community==0.2.16
|
5 |
+
langchain-text-splitters
|
6 |
+
langchain-openai
|
7 |
+
langchain-qdrant
|
8 |
+
qdrant-client
|
9 |
+
pymupdf
|