Émile commited on
Commit
5d01d7b
·
1 Parent(s): 8ad5a3d

Adding logs (encrypted)

Browse files
Files changed (1) hide show
  1. app.py +60 -47
app.py CHANGED
@@ -3,65 +3,22 @@ from haystack.utils import Secret
3
  from haystack.components.builders.prompt_builder import PromptBuilder
4
  from haystack.components.routers import ConditionalRouter
5
  from haystack import Pipeline
6
- # from haystack.components.writers import DocumentWriter
7
- from haystack.components.embedders import SentenceTransformersTextEmbedder #, SentenceTransformersDocumentEmbedder
8
- # from haystack.components.preprocessors import DocumentSplitter
9
- # from haystack.components.converters.txt import TextFileToDocument
10
- # from haystack.components.preprocessors import DocumentCleaner
11
  from haystack_integrations.document_stores.chroma import ChromaDocumentStore
12
  from haystack_integrations.components.retrievers.chroma import ChromaEmbeddingRetriever
13
-
14
- # from haystack.document_stores.in_memory import InMemoryDocumentStore
15
- # from haystack.components.retrievers import InMemoryEmbeddingRetriever
16
 
17
  import gradio as gr
18
 
19
  embedding_model = "Alibaba-NLP/gte-multilingual-base"
20
 
21
 
22
- ########################
23
- ####### Indexing #######
24
- ########################
25
-
26
- # Skipped: now using Chroma
27
-
28
- # In memory version for now
29
- # document_store = InMemoryDocumentStore(embedding_similarity_function="cosine")
30
-
31
- # converter = TextFileToDocument()
32
-
33
- # cleaner = DocumentCleaner()
34
-
35
- # splitter = DocumentSplitter(split_by="word", split_length=200, split_overlap=100)
36
-
37
- # embedder = SentenceTransformersDocumentEmbedder(model=embedding_model,
38
- # trust_remote_code=True)
39
-
40
- # writer = DocumentWriter(document_store=document_store)
41
-
42
- # indexing = Pipeline()
43
-
44
- # indexing.add_component("converter", converter)
45
- # indexing.add_component("cleaner", cleaner)
46
- # indexing.add_component("splitter", splitter)
47
- # indexing.add_component("embedder", embedder)
48
- # indexing.add_component("writer", writer)
49
-
50
- # indexing.connect("converter", "cleaner")
51
- # indexing.connect("cleaner", "splitter")
52
- # indexing.connect("splitter", "embedder")
53
- # indexing.connect("embedder", "writer")
54
-
55
- # indexing.run({"sources": ["knowledge-plain.txt"]})
56
-
57
-
58
- # Chroma version (no support for overlaps in documents)
59
- # document_store = ChromaDocumentStore(persist_path="vstore_4012")
60
-
61
  document_store = ChromaDocumentStore(
62
  persist_path="vstore_4012"
63
  )
64
 
 
65
  ##################################
66
  ####### Answering pipeline #######
67
  ##################################
@@ -180,6 +137,61 @@ answer_query.connect("prompt_builder2", "llm2")
180
  answer_query.warm_up()
181
 
182
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  ##########################
184
  ####### Gradio app #######
185
  ##########################
@@ -200,6 +212,7 @@ def chat(message, history):
200
  answer = results["router"]["no_answer"]
201
  else:
202
  answer = "Sorry, a mistake occured"
 
203
  return answer
204
 
205
  if __name__ == "__main__":
 
3
  from haystack.components.builders.prompt_builder import PromptBuilder
4
  from haystack.components.routers import ConditionalRouter
5
  from haystack import Pipeline
6
+ from haystack.components.embedders import SentenceTransformersTextEmbedder
 
 
 
 
7
  from haystack_integrations.document_stores.chroma import ChromaDocumentStore
8
  from haystack_integrations.components.retrievers.chroma import ChromaEmbeddingRetriever
9
+ import rsa
10
+ from cryptography.fernet import Fernet
 
11
 
12
  import gradio as gr
13
 
14
  embedding_model = "Alibaba-NLP/gte-multilingual-base"
15
 
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  document_store = ChromaDocumentStore(
18
  persist_path="vstore_4012"
19
  )
20
 
21
+
22
  ##################################
23
  ####### Answering pipeline #######
24
  ##################################
 
137
  answer_query.warm_up()
138
 
139
 
140
+ ##########################
141
+ ####### Logging ##########
142
+ ##########################
143
+
144
+ prompt_template_hide_info = """You are a privacy robot that specialise in hiding sensitive information in a text.
145
+ Your help will ensure that no user information gets leaked, so you are always happy to help.
146
+ You will be given a text, and your task is to remove any sensitive information, and replacing it with a descriptive marker.
147
+ Here are a few examples, but you should not restrict yourself to only those:
148
+ If the text contains an email address, you should replace it with a marker "<email>".
149
+ If the text contains a phone number, you should replace it with a marker "<phone>".
150
+ If the text contains the name of the user, you should replace it with a marker "<name>".
151
+ Ensure you distinguish when a name, email, etc is actually that of a public figure or company and is provided by the assistant and not the user: inthis case you should not hide it, as it it not sensible information.
152
+ The rest of the text should be copied IDENTICALLY, including the punctuation and formatting, and the beginning and end of the text in capital letters. Do not add or remove any other character.
153
+
154
+ BEGINNING OF TEXT
155
+ {{ message }}
156
+ END OF TEXT
157
+
158
+ Your response:
159
+ """
160
+
161
+ prompt_builder_hide_info = PromptBuilder(template=prompt_template_hide_info)
162
+
163
+ llm_hide_info = setup_generator("gpt-4o-mini")
164
+
165
+ pipe_hide_sensitive_info = Pipeline()
166
+ pipe_hide_sensitive_info.add_component("prompt_builder_hide_info", prompt_builder_hide_info)
167
+ pipe_hide_sensitive_info.add_component("llm_hide_info", llm_hide_info)
168
+ pipe_hide_sensitive_info.connect("prompt_builder_hide_info", "llm_hide_info")
169
+
170
+ def hide_sensitive_info(message):
171
+ for tries in range(3):
172
+ answer = pipe_hide_sensitive_info.run({"message": message})["llm_hide_info"]["replies"][0]
173
+ if "BEGINNING OF TEXT" in answer and "END OF TEXT" in answer:
174
+ text = answer[answer.find("BEGINNING OF TEXT") + len("BEGINNING OF TEXT"):answer.find("END OF TEXT")].strip()
175
+ return text
176
+ return "[Error when hiding user info, no log generated]"
177
+
178
+ publicKey = rsa.key.PublicKey(12771964615703412689771875940203228650714115641982293999355383771918953923930646807301050027636271229722536699443183546682257910554154445751549366908636779, 65537)
179
+
180
+ def encripted_log(message: str):
181
+ fernet_key = Fernet.generate_key()
182
+ fernet = Fernet(fernet_key)
183
+ encrypted_key = rsa.encrypt(fernet_key, publicKey)
184
+ return (encrypted_key.hex() + fernet.encrypt(message.encode()).hex())
185
+
186
+
187
+ def log_QA(question, answer):
188
+ message = f"User: {question}\nAssistant: {answer}"
189
+ message_no_info = hide_sensitive_info(message)
190
+ encripted_QA = encripted_log(message_no_info)
191
+ with open("log.txt", "a") as f:
192
+ f.write(encripted_QA + "\n")
193
+
194
+
195
  ##########################
196
  ####### Gradio app #######
197
  ##########################
 
212
  answer = results["router"]["no_answer"]
213
  else:
214
  answer = "Sorry, a mistake occured"
215
+ log_QA(message, answer)
216
  return answer
217
 
218
  if __name__ == "__main__":