40point12

Running

App Files Files Community

Émile commited on Jan 7

Commit

5d01d7b

1 Parent(s): 8ad5a3d

Adding logs (encrypted)

Browse files

Files changed (1) hide show

app.py +60 -47

app.py CHANGED Viewed

@@ -3,65 +3,22 @@ from haystack.utils import Secret
 from haystack.components.builders.prompt_builder import PromptBuilder
 from haystack.components.routers import ConditionalRouter
 from haystack import Pipeline
-# from haystack.components.writers import DocumentWriter
-from haystack.components.embedders import SentenceTransformersTextEmbedder #, SentenceTransformersDocumentEmbedder
-# from haystack.components.preprocessors import DocumentSplitter
-# from haystack.components.converters.txt import TextFileToDocument
-# from haystack.components.preprocessors import DocumentCleaner
 from haystack_integrations.document_stores.chroma import ChromaDocumentStore
 from haystack_integrations.components.retrievers.chroma import ChromaEmbeddingRetriever
-# from haystack.document_stores.in_memory import InMemoryDocumentStore
-# from haystack.components.retrievers import InMemoryEmbeddingRetriever
 import gradio as gr
 embedding_model = "Alibaba-NLP/gte-multilingual-base"
-########################
-####### Indexing #######
-########################
-# Skipped: now using Chroma
-# In memory version for now
-# document_store = InMemoryDocumentStore(embedding_similarity_function="cosine")
-# converter = TextFileToDocument()
-# cleaner = DocumentCleaner()
-# splitter = DocumentSplitter(split_by="word", split_length=200, split_overlap=100)
-# embedder = SentenceTransformersDocumentEmbedder(model=embedding_model,
-#                                                 trust_remote_code=True)
-# writer = DocumentWriter(document_store=document_store)
-# indexing = Pipeline()
-# indexing.add_component("converter", converter)
-# indexing.add_component("cleaner", cleaner)
-# indexing.add_component("splitter", splitter)
-# indexing.add_component("embedder", embedder)
-# indexing.add_component("writer", writer)
-# indexing.connect("converter", "cleaner")
-# indexing.connect("cleaner", "splitter")
-# indexing.connect("splitter", "embedder")
-# indexing.connect("embedder", "writer")
-# indexing.run({"sources": ["knowledge-plain.txt"]})
-# Chroma version (no support for overlaps in documents)
-# document_store = ChromaDocumentStore(persist_path="vstore_4012")
 document_store = ChromaDocumentStore(
     persist_path="vstore_4012"
 )
 ##################################
 ####### Answering pipeline #######
 ##################################
@@ -180,6 +137,61 @@ answer_query.connect("prompt_builder2", "llm2")
 answer_query.warm_up()
 ##########################
 ####### Gradio app #######
 ##########################
@@ -200,6 +212,7 @@ def chat(message, history):
         answer = results["router"]["no_answer"]
     else:
         answer = "Sorry, a mistake occured"
     return answer
 if __name__ == "__main__":

 from haystack.components.builders.prompt_builder import PromptBuilder
 from haystack.components.routers import ConditionalRouter
 from haystack import Pipeline
+from haystack.components.embedders import SentenceTransformersTextEmbedder
 from haystack_integrations.document_stores.chroma import ChromaDocumentStore
 from haystack_integrations.components.retrievers.chroma import ChromaEmbeddingRetriever
+import rsa
+from cryptography.fernet import Fernet
 import gradio as gr
 embedding_model = "Alibaba-NLP/gte-multilingual-base"
 document_store = ChromaDocumentStore(
     persist_path="vstore_4012"
 )
 ##################################
 ####### Answering pipeline #######
 ##################################
 answer_query.warm_up()
+##########################
+####### Logging ##########
+##########################
+prompt_template_hide_info = """You are a privacy robot that specialise in hiding sensitive information in a text.
+Your help will ensure that no user information gets leaked, so you are always happy to help.
+You will be given a text, and your task is to remove any sensitive information, and replacing it with a descriptive marker.
+Here are a few examples, but you should not restrict yourself to only those:
+If the text contains an email address, you should replace it with a marker "<email>".
+If the text contains a phone number, you should replace it with a marker "<phone>".
+If the text contains the name of the user, you should replace it with a marker "<name>".
+Ensure you distinguish when a name, email, etc is actually that of a public figure or company and is provided by the assistant and not the user: inthis case you should not hide it, as it it not sensible information.
+The rest of the text should be copied IDENTICALLY, including the punctuation and formatting, and the beginning and end of the text in capital letters. Do not add or remove any other character.
+BEGINNING OF TEXT
+{{ message }}
+END OF TEXT
+Your response:
+"""
+prompt_builder_hide_info = PromptBuilder(template=prompt_template_hide_info)
+llm_hide_info = setup_generator("gpt-4o-mini")
+pipe_hide_sensitive_info = Pipeline()
+pipe_hide_sensitive_info.add_component("prompt_builder_hide_info", prompt_builder_hide_info)
+pipe_hide_sensitive_info.add_component("llm_hide_info", llm_hide_info)
+pipe_hide_sensitive_info.connect("prompt_builder_hide_info", "llm_hide_info")
+def hide_sensitive_info(message):
+    for tries in range(3):
+        answer = pipe_hide_sensitive_info.run({"message": message})["llm_hide_info"]["replies"][0]
+        if "BEGINNING OF TEXT" in answer and "END OF TEXT" in answer:
+            text = answer[answer.find("BEGINNING OF TEXT") + len("BEGINNING OF TEXT"):answer.find("END OF TEXT")].strip()
+            return text
+        return "[Error when hiding user info, no log generated]"
+publicKey = rsa.key.PublicKey(12771964615703412689771875940203228650714115641982293999355383771918953923930646807301050027636271229722536699443183546682257910554154445751549366908636779, 65537)
+def encripted_log(message: str):
+    fernet_key = Fernet.generate_key()
+    fernet = Fernet(fernet_key)
+    encrypted_key = rsa.encrypt(fernet_key, publicKey)
+    return (encrypted_key.hex() + fernet.encrypt(message.encode()).hex())
+def log_QA(question, answer):
+    message = f"User: {question}\nAssistant: {answer}"
+    message_no_info = hide_sensitive_info(message)
+    encripted_QA = encripted_log(message_no_info)
+    with open("log.txt", "a") as f:
+        f.write(encripted_QA + "\n")
 ##########################
 ####### Gradio app #######
 ##########################
         answer = results["router"]["no_answer"]
     else:
         answer = "Sorry, a mistake occured"
+    log_QA(message, answer)
     return answer
 if __name__ == "__main__":