Spaces:
Running
Running
santiviquez
commited on
Commit
·
6271197
1
Parent(s):
9b6eb1c
first commit
Browse files- The-AI-Act.pdf +0 -0
- app.py +271 -0
- requirements.txt +8 -0
The-AI-Act.pdf
ADDED
The diff for this file is too large to render.
See raw diff
|
|
app.py
ADDED
@@ -0,0 +1,271 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from PyPDF2 import PdfReader
|
2 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
3 |
+
from langchain.text_splitter import CharacterTextSplitter
|
4 |
+
from langchain.vectorstores import FAISS
|
5 |
+
from langchain.chains.question_answering import load_qa_chain
|
6 |
+
from langchain.llms import OpenAI
|
7 |
+
import streamlit as st
|
8 |
+
from streamlit_chat import message
|
9 |
+
import extra_streamlit_components as stx
|
10 |
+
import os
|
11 |
+
import datetime
|
12 |
+
import openai
|
13 |
+
import random
|
14 |
+
|
15 |
+
|
16 |
+
# Get your API keys from openai, you will need to create an account.
|
17 |
+
# Here is the link to get the keys: https://platform.openai.com/account/billing/overview
|
18 |
+
os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"]
|
19 |
+
|
20 |
+
|
21 |
+
@st.cache(allow_output_mutation=True, suppress_st_warning=True)
|
22 |
+
def get_manager(key):
|
23 |
+
return stx.CookieManager(key=key)
|
24 |
+
|
25 |
+
|
26 |
+
cookie_manager = get_manager(key=0)
|
27 |
+
# cookie = cookie_manager.get(cookie="actchat")
|
28 |
+
# cookie_manager = get_manager(key=1)
|
29 |
+
# openai_cookie = cookie_manager.get(cookie="openaikey")
|
30 |
+
|
31 |
+
|
32 |
+
user_limit_cookie = None
|
33 |
+
cookies = cookie_manager.get_all()
|
34 |
+
|
35 |
+
if cookies:
|
36 |
+
if "actchat" in cookies:
|
37 |
+
user_limit_cookie = cookies["actchat"]
|
38 |
+
|
39 |
+
|
40 |
+
@st.cache_resource
|
41 |
+
def read_data():
|
42 |
+
# location of the pdf file/files.
|
43 |
+
reader = PdfReader("The-AI-Act.pdf")
|
44 |
+
|
45 |
+
# read data from the file and put them into a variable called raw_text
|
46 |
+
raw_text = ""
|
47 |
+
for i, page in enumerate(reader.pages):
|
48 |
+
text = page.extract_text()
|
49 |
+
if text:
|
50 |
+
raw_text += text
|
51 |
+
return raw_text
|
52 |
+
|
53 |
+
|
54 |
+
# We need to split the text that we read into smaller chunks so that during
|
55 |
+
# information retreival we don't hit the token size limits.
|
56 |
+
@st.cache_resource
|
57 |
+
def split_document(raw_text):
|
58 |
+
text_splitter = CharacterTextSplitter(
|
59 |
+
separator="\n",
|
60 |
+
chunk_size=1000,
|
61 |
+
chunk_overlap=200,
|
62 |
+
length_function=len,
|
63 |
+
)
|
64 |
+
texts = text_splitter.split_text(raw_text)
|
65 |
+
return texts
|
66 |
+
|
67 |
+
|
68 |
+
# Download embeddings from OpenAI
|
69 |
+
@st.cache_resource
|
70 |
+
def load_openai_embeddings():
|
71 |
+
embeddings = OpenAIEmbeddings()
|
72 |
+
return embeddings
|
73 |
+
|
74 |
+
|
75 |
+
@st.cache_resource
|
76 |
+
def init_docsearch(texts, _embeddings):
|
77 |
+
docsearch = FAISS.from_texts(texts, _embeddings)
|
78 |
+
return docsearch
|
79 |
+
|
80 |
+
|
81 |
+
@st.cache_resource
|
82 |
+
def init_qa_chain():
|
83 |
+
chain = load_qa_chain(OpenAI(temperature=0.8), chain_type="stuff")
|
84 |
+
return chain
|
85 |
+
|
86 |
+
|
87 |
+
raw_text = read_data()
|
88 |
+
texts = split_document(raw_text)
|
89 |
+
embeddings = load_openai_embeddings()
|
90 |
+
docsearch = init_docsearch(texts, embeddings)
|
91 |
+
chain = init_qa_chain()
|
92 |
+
|
93 |
+
avatars = [
|
94 |
+
"avataaars",
|
95 |
+
"big-ears",
|
96 |
+
"big-ears-neutral",
|
97 |
+
"big-smile",
|
98 |
+
"identicon",
|
99 |
+
"initials",
|
100 |
+
"lorelei",
|
101 |
+
"lorelei-neutral",
|
102 |
+
"micah",
|
103 |
+
"miniavs",
|
104 |
+
"open-peeps",
|
105 |
+
"personas",
|
106 |
+
"pixel-art",
|
107 |
+
"pixel-art-neutral",
|
108 |
+
"shapes",
|
109 |
+
"thumbs",
|
110 |
+
]
|
111 |
+
|
112 |
+
user_avatar = avatars[random.randint(0, len(avatars) - 1)]
|
113 |
+
|
114 |
+
st.title("EU AI ACT GPT🤖")
|
115 |
+
st.write(
|
116 |
+
"""The AI Act is a proposed European law on artificial intelligence (AI) –
|
117 |
+
the first law on AI by a major regulator anywhere."""
|
118 |
+
)
|
119 |
+
st.markdown(
|
120 |
+
"""The EU AI Act is going into force on June 15th. We at [NannyML](https://github.com/NannyML/nannyml)
|
121 |
+
finetuned GPT-X with all the **107 pages** in the document so you can ask all
|
122 |
+
the necessary questions and be informed about it.
|
123 |
+
|
124 |
+
"""
|
125 |
+
)
|
126 |
+
|
127 |
+
# create state sessions
|
128 |
+
if "text_input" not in st.session_state:
|
129 |
+
st.session_state["text_input"] = ""
|
130 |
+
|
131 |
+
if "generated" not in st.session_state:
|
132 |
+
st.session_state["generated"] = []
|
133 |
+
|
134 |
+
if "messages" not in st.session_state:
|
135 |
+
st.session_state["messages"] = []
|
136 |
+
|
137 |
+
if "openaikey" not in st.session_state:
|
138 |
+
st.session_state["openaikey"] = []
|
139 |
+
|
140 |
+
if "disabled" not in st.session_state:
|
141 |
+
st.session_state["disabled"] = False
|
142 |
+
|
143 |
+
if user_limit_cookie == "01234" and len(st.session_state["openaikey"]) == 0:
|
144 |
+
st.session_state["disabled"] = True
|
145 |
+
else:
|
146 |
+
st.session_state["disabled"] = False
|
147 |
+
|
148 |
+
if len(st.session_state["openaikey"]) != 0:
|
149 |
+
openai.api_key = st.session_state["openaikey"]
|
150 |
+
|
151 |
+
|
152 |
+
if "avatar" not in st.session_state:
|
153 |
+
st.session_state["avatar"] = user_avatar
|
154 |
+
|
155 |
+
|
156 |
+
def disable():
|
157 |
+
st.session_state["disabled"] = True
|
158 |
+
|
159 |
+
|
160 |
+
if "history" not in st.session_state:
|
161 |
+
st.session_state["history"] = ""
|
162 |
+
|
163 |
+
|
164 |
+
def submit():
|
165 |
+
st.session_state["text_input"] = st.session_state["text_area"]
|
166 |
+
st.session_state["text_area"] = ""
|
167 |
+
|
168 |
+
|
169 |
+
# Template prompt to establish the behaviour and the persona of the chatbot
|
170 |
+
def template(history, query):
|
171 |
+
return """
|
172 |
+
You are an assistant and expert in the EU AI Act. Based on your expertise,
|
173 |
+
you need to assist and provide the answer to the business questions about the EU AI Act.
|
174 |
+
Your answer has to be clear and easy to understand for the user.
|
175 |
+
Your answer has to be detailed and provide only fact-checked infromations based on the act.
|
176 |
+
Be sure to ask any additional information you may need, to provide an accurate answer.
|
177 |
+
Refer to the coverstation history if necessary.
|
178 |
+
Be friendly and polite to the user.
|
179 |
+
|
180 |
+
Coversation history :
|
181 |
+
{}
|
182 |
+
|
183 |
+
User question : {}
|
184 |
+
Assistant :""".format(
|
185 |
+
history, query
|
186 |
+
)
|
187 |
+
|
188 |
+
|
189 |
+
def generate_response(question):
|
190 |
+
docs = docsearch.similarity_search(question)
|
191 |
+
response = chain.run(input_documents=docs, question=question)
|
192 |
+
st.session_state["generated"].append({"role": "assistant", "content": response})
|
193 |
+
st.session_state["history"] += "User question : " + question + " / "
|
194 |
+
st.session_state["history"] += "Assistant : " + response + " / "
|
195 |
+
|
196 |
+
|
197 |
+
response_container = st.container()
|
198 |
+
prompt = st.text_area(
|
199 |
+
"Enter your question here about the EU AI Act",
|
200 |
+
disabled=st.session_state["disabled"],
|
201 |
+
key="text_area",
|
202 |
+
on_change=submit,
|
203 |
+
)
|
204 |
+
prompt = st.session_state["text_input"]
|
205 |
+
send_button = st.button("Send", disabled=st.session_state["disabled"])
|
206 |
+
if send_button and prompt:
|
207 |
+
st.session_state["messages"].append({"role": "user", "content": prompt})
|
208 |
+
question = template(st.session_state["history"], prompt)
|
209 |
+
with st.spinner("Generating response..."):
|
210 |
+
generate_response(question)
|
211 |
+
|
212 |
+
# try:
|
213 |
+
# generate_response(prompt)
|
214 |
+
# except:
|
215 |
+
# st.error("There is an error with your API key. Or you might ran out of quota.")
|
216 |
+
|
217 |
+
|
218 |
+
if st.session_state["messages"]:
|
219 |
+
with response_container:
|
220 |
+
for i in range(len(st.session_state["generated"])):
|
221 |
+
message(
|
222 |
+
st.session_state["messages"][i]["content"],
|
223 |
+
is_user=True,
|
224 |
+
key=str(i) + "_user",
|
225 |
+
avatar_style=st.session_state["avatar"],
|
226 |
+
)
|
227 |
+
message(st.session_state["generated"][i]["content"], key=str(i))
|
228 |
+
|
229 |
+
if len(st.session_state["messages"]) > 4:
|
230 |
+
cookie_manager.set(
|
231 |
+
"actchat", val="01234", expires_at=datetime.datetime(year=2025, month=1, day=1)
|
232 |
+
)
|
233 |
+
|
234 |
+
if user_limit_cookie == "01234" and len(st.session_state["openaikey"]) == 0:
|
235 |
+
st.markdown("##### Provide your own OpenAI API Key")
|
236 |
+
st.write(
|
237 |
+
"""
|
238 |
+
Due to limitations in api request calls per user to continoue the
|
239 |
+
converstation, please provide your personal OpenAI API key.
|
240 |
+
For more info on how to get and API Key visit
|
241 |
+
[OpenAI docs](https://platform.openai.com/account/api-keys)
|
242 |
+
about it."""
|
243 |
+
)
|
244 |
+
# disable()
|
245 |
+
openaikey = st.text_input("OPENAI_API_KEY:")
|
246 |
+
api_button = st.button("Add")
|
247 |
+
|
248 |
+
if api_button:
|
249 |
+
st.session_state["disabled"] = False
|
250 |
+
st.session_state["openaikey"] = openaikey
|
251 |
+
openai.api_key = openaikey
|
252 |
+
else:
|
253 |
+
st.session_state["disabled"] = False
|
254 |
+
|
255 |
+
st.markdown(
|
256 |
+
"""##### Sample questions to ask it
|
257 |
+
* What are the objectives of the EU AI Act?
|
258 |
+
* What are the potential fines that a company may face for failing to comply with the EU AI Act?
|
259 |
+
* Explain in simple words the different risk levels in the EU AI Act.
|
260 |
+
"""
|
261 |
+
)
|
262 |
+
|
263 |
+
|
264 |
+
st.text("")
|
265 |
+
st.markdown(
|
266 |
+
"""`Created by` [santiviquez](https://twitter.com/santiviquez) and
|
267 |
+
[maciejbalawejder](https://www.linkedin.com/in/maciej-balawejder-rt8015/)
|
268 |
+
from [NannyML](https://github.com/NannyML/nannyml) —
|
269 |
+
The open-source library to estimate model performance in production
|
270 |
+
*without ground truth*. With the help of santiviquez"""
|
271 |
+
)
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pypdf2==3.0.1
|
2 |
+
langchain==0.0.170
|
3 |
+
streamlit==1.22.0
|
4 |
+
streamlit-chat==0.0.2.2
|
5 |
+
openai==0.27.6
|
6 |
+
tiktoken==0.4.0
|
7 |
+
faiss-cpu==1.7.4
|
8 |
+
extra_streamlit_components == 0.1.56
|