santiviquez commited on
Commit
6271197
·
1 Parent(s): 9b6eb1c

first commit

Browse files
Files changed (3) hide show
  1. The-AI-Act.pdf +0 -0
  2. app.py +271 -0
  3. requirements.txt +8 -0
The-AI-Act.pdf ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PyPDF2 import PdfReader
2
+ from langchain.embeddings.openai import OpenAIEmbeddings
3
+ from langchain.text_splitter import CharacterTextSplitter
4
+ from langchain.vectorstores import FAISS
5
+ from langchain.chains.question_answering import load_qa_chain
6
+ from langchain.llms import OpenAI
7
+ import streamlit as st
8
+ from streamlit_chat import message
9
+ import extra_streamlit_components as stx
10
+ import os
11
+ import datetime
12
+ import openai
13
+ import random
14
+
15
+
16
+ # Get your API keys from openai, you will need to create an account.
17
+ # Here is the link to get the keys: https://platform.openai.com/account/billing/overview
18
+ os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"]
19
+
20
+
21
+ @st.cache(allow_output_mutation=True, suppress_st_warning=True)
22
+ def get_manager(key):
23
+ return stx.CookieManager(key=key)
24
+
25
+
26
+ cookie_manager = get_manager(key=0)
27
+ # cookie = cookie_manager.get(cookie="actchat")
28
+ # cookie_manager = get_manager(key=1)
29
+ # openai_cookie = cookie_manager.get(cookie="openaikey")
30
+
31
+
32
+ user_limit_cookie = None
33
+ cookies = cookie_manager.get_all()
34
+
35
+ if cookies:
36
+ if "actchat" in cookies:
37
+ user_limit_cookie = cookies["actchat"]
38
+
39
+
40
+ @st.cache_resource
41
+ def read_data():
42
+ # location of the pdf file/files.
43
+ reader = PdfReader("The-AI-Act.pdf")
44
+
45
+ # read data from the file and put them into a variable called raw_text
46
+ raw_text = ""
47
+ for i, page in enumerate(reader.pages):
48
+ text = page.extract_text()
49
+ if text:
50
+ raw_text += text
51
+ return raw_text
52
+
53
+
54
+ # We need to split the text that we read into smaller chunks so that during
55
+ # information retreival we don't hit the token size limits.
56
+ @st.cache_resource
57
+ def split_document(raw_text):
58
+ text_splitter = CharacterTextSplitter(
59
+ separator="\n",
60
+ chunk_size=1000,
61
+ chunk_overlap=200,
62
+ length_function=len,
63
+ )
64
+ texts = text_splitter.split_text(raw_text)
65
+ return texts
66
+
67
+
68
+ # Download embeddings from OpenAI
69
+ @st.cache_resource
70
+ def load_openai_embeddings():
71
+ embeddings = OpenAIEmbeddings()
72
+ return embeddings
73
+
74
+
75
+ @st.cache_resource
76
+ def init_docsearch(texts, _embeddings):
77
+ docsearch = FAISS.from_texts(texts, _embeddings)
78
+ return docsearch
79
+
80
+
81
+ @st.cache_resource
82
+ def init_qa_chain():
83
+ chain = load_qa_chain(OpenAI(temperature=0.8), chain_type="stuff")
84
+ return chain
85
+
86
+
87
+ raw_text = read_data()
88
+ texts = split_document(raw_text)
89
+ embeddings = load_openai_embeddings()
90
+ docsearch = init_docsearch(texts, embeddings)
91
+ chain = init_qa_chain()
92
+
93
+ avatars = [
94
+ "avataaars",
95
+ "big-ears",
96
+ "big-ears-neutral",
97
+ "big-smile",
98
+ "identicon",
99
+ "initials",
100
+ "lorelei",
101
+ "lorelei-neutral",
102
+ "micah",
103
+ "miniavs",
104
+ "open-peeps",
105
+ "personas",
106
+ "pixel-art",
107
+ "pixel-art-neutral",
108
+ "shapes",
109
+ "thumbs",
110
+ ]
111
+
112
+ user_avatar = avatars[random.randint(0, len(avatars) - 1)]
113
+
114
+ st.title("EU AI ACT GPT🤖")
115
+ st.write(
116
+ """The AI Act is a proposed European law on artificial intelligence (AI) –
117
+ the first law on AI by a major regulator anywhere."""
118
+ )
119
+ st.markdown(
120
+ """The EU AI Act is going into force on June 15th. We at [NannyML](https://github.com/NannyML/nannyml)
121
+ finetuned GPT-X with all the **107 pages** in the document so you can ask all
122
+ the necessary questions and be informed about it.
123
+
124
+ """
125
+ )
126
+
127
+ # create state sessions
128
+ if "text_input" not in st.session_state:
129
+ st.session_state["text_input"] = ""
130
+
131
+ if "generated" not in st.session_state:
132
+ st.session_state["generated"] = []
133
+
134
+ if "messages" not in st.session_state:
135
+ st.session_state["messages"] = []
136
+
137
+ if "openaikey" not in st.session_state:
138
+ st.session_state["openaikey"] = []
139
+
140
+ if "disabled" not in st.session_state:
141
+ st.session_state["disabled"] = False
142
+
143
+ if user_limit_cookie == "01234" and len(st.session_state["openaikey"]) == 0:
144
+ st.session_state["disabled"] = True
145
+ else:
146
+ st.session_state["disabled"] = False
147
+
148
+ if len(st.session_state["openaikey"]) != 0:
149
+ openai.api_key = st.session_state["openaikey"]
150
+
151
+
152
+ if "avatar" not in st.session_state:
153
+ st.session_state["avatar"] = user_avatar
154
+
155
+
156
+ def disable():
157
+ st.session_state["disabled"] = True
158
+
159
+
160
+ if "history" not in st.session_state:
161
+ st.session_state["history"] = ""
162
+
163
+
164
+ def submit():
165
+ st.session_state["text_input"] = st.session_state["text_area"]
166
+ st.session_state["text_area"] = ""
167
+
168
+
169
+ # Template prompt to establish the behaviour and the persona of the chatbot
170
+ def template(history, query):
171
+ return """
172
+ You are an assistant and expert in the EU AI Act. Based on your expertise,
173
+ you need to assist and provide the answer to the business questions about the EU AI Act.
174
+ Your answer has to be clear and easy to understand for the user.
175
+ Your answer has to be detailed and provide only fact-checked infromations based on the act.
176
+ Be sure to ask any additional information you may need, to provide an accurate answer.
177
+ Refer to the coverstation history if necessary.
178
+ Be friendly and polite to the user.
179
+
180
+ Coversation history :
181
+ {}
182
+
183
+ User question : {}
184
+ Assistant :""".format(
185
+ history, query
186
+ )
187
+
188
+
189
+ def generate_response(question):
190
+ docs = docsearch.similarity_search(question)
191
+ response = chain.run(input_documents=docs, question=question)
192
+ st.session_state["generated"].append({"role": "assistant", "content": response})
193
+ st.session_state["history"] += "User question : " + question + " / "
194
+ st.session_state["history"] += "Assistant : " + response + " / "
195
+
196
+
197
+ response_container = st.container()
198
+ prompt = st.text_area(
199
+ "Enter your question here about the EU AI Act",
200
+ disabled=st.session_state["disabled"],
201
+ key="text_area",
202
+ on_change=submit,
203
+ )
204
+ prompt = st.session_state["text_input"]
205
+ send_button = st.button("Send", disabled=st.session_state["disabled"])
206
+ if send_button and prompt:
207
+ st.session_state["messages"].append({"role": "user", "content": prompt})
208
+ question = template(st.session_state["history"], prompt)
209
+ with st.spinner("Generating response..."):
210
+ generate_response(question)
211
+
212
+ # try:
213
+ # generate_response(prompt)
214
+ # except:
215
+ # st.error("There is an error with your API key. Or you might ran out of quota.")
216
+
217
+
218
+ if st.session_state["messages"]:
219
+ with response_container:
220
+ for i in range(len(st.session_state["generated"])):
221
+ message(
222
+ st.session_state["messages"][i]["content"],
223
+ is_user=True,
224
+ key=str(i) + "_user",
225
+ avatar_style=st.session_state["avatar"],
226
+ )
227
+ message(st.session_state["generated"][i]["content"], key=str(i))
228
+
229
+ if len(st.session_state["messages"]) > 4:
230
+ cookie_manager.set(
231
+ "actchat", val="01234", expires_at=datetime.datetime(year=2025, month=1, day=1)
232
+ )
233
+
234
+ if user_limit_cookie == "01234" and len(st.session_state["openaikey"]) == 0:
235
+ st.markdown("##### Provide your own OpenAI API Key")
236
+ st.write(
237
+ """
238
+ Due to limitations in api request calls per user to continoue the
239
+ converstation, please provide your personal OpenAI API key.
240
+ For more info on how to get and API Key visit
241
+ [OpenAI docs](https://platform.openai.com/account/api-keys)
242
+ about it."""
243
+ )
244
+ # disable()
245
+ openaikey = st.text_input("OPENAI_API_KEY:")
246
+ api_button = st.button("Add")
247
+
248
+ if api_button:
249
+ st.session_state["disabled"] = False
250
+ st.session_state["openaikey"] = openaikey
251
+ openai.api_key = openaikey
252
+ else:
253
+ st.session_state["disabled"] = False
254
+
255
+ st.markdown(
256
+ """##### Sample questions to ask it
257
+ * What are the objectives of the EU AI Act?
258
+ * What are the potential fines that a company may face for failing to comply with the EU AI Act?
259
+ * Explain in simple words the different risk levels in the EU AI Act.
260
+ """
261
+ )
262
+
263
+
264
+ st.text("")
265
+ st.markdown(
266
+ """`Created by` [santiviquez](https://twitter.com/santiviquez) and
267
+ [maciejbalawejder](https://www.linkedin.com/in/maciej-balawejder-rt8015/)
268
+ from [NannyML](https://github.com/NannyML/nannyml) —
269
+ The open-source library to estimate model performance in production
270
+ *without ground truth*. With the help of santiviquez"""
271
+ )
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ pypdf2==3.0.1
2
+ langchain==0.0.170
3
+ streamlit==1.22.0
4
+ streamlit-chat==0.0.2.2
5
+ openai==0.27.6
6
+ tiktoken==0.4.0
7
+ faiss-cpu==1.7.4
8
+ extra_streamlit_components == 0.1.56