Spaces:

VicidiLochi
/

SIA_Partners_Hackathon

Sleeping

App Files Files Community

VicidiLochi commited on Jan 22

Commit

9d3239d

1 Parent(s): e154ed7

Add anonymization app

Browse files

Files changed (2) hide show

app.py +71 -0
requirements.txt +2 -0

app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import gradio as gr
+from transformers import pipeline
+# Load a lightweight model
+generator = pipeline("text2text-generation", model="t5-small", tokenizer="t5-small")
+# Function to construct the anonymization prompt
+def construct_prompt(input_text):
+    prompt = f"""
+    You are a text anonymizer. Your task is to process a given text, identify all personal names, replace them with unique and realistic pseudonyms, and return both the anonymized text and a dictionary linking the original names to the replacement names.
+    The input text will be provided, and your output should follow this JSON structure:
+    {{
+      "anonymized_text": "The text with all names replaced by pseudonyms.",
+      "name_mapping": {{
+        "OriginalName1": "ReplacementName1",
+        "OriginalName2": "ReplacementName2"
+      }}
+    }}
+    ### Guidelines:
+    1. Only replace personal names (e.g., first names, last names, or full names).
+    2. Ensure the pseudonyms are realistic and appropriate for the type of name being replaced.
+    3. Keep all other parts of the text unchanged.
+    4. If no names are found, return the original text with an empty dictionary.
+    Here is the input text:
+    "{input_text}"
+    Please process the input and return your output in the specified JSON format.
+    """
+    return prompt.strip()
+# Function to process the input text and generate the anonymized output
+def anonymize_text(input_text):
+    # Construct the instruction prompt
+    prompt = construct_prompt(input_text)
+    # Generate the output using the model
+    response = generator(prompt, max_length=512, num_return_sequences=1)
+    generated_text = response[0]['generated_text']
+    # Attempt to parse the JSON output
+    try:
+        result = eval(generated_text)  # Convert the output string to a Python dictionary
+        anonymized_text = result.get("anonymized_text", input_text)
+        name_mapping = result.get("name_mapping", {})
+    except Exception as e:
+        anonymized_text = input_text
+        name_mapping = {}
+    return anonymized_text, name_mapping
+# Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("## Text Anonymizer")
+    gr.Markdown("Enter text containing personal names, and the model will anonymize it by replacing the names with pseudonyms. The app will also return a dictionary linking original names to their replacements.")
+    input_text = gr.Textbox(label="Input Text", placeholder="Enter text here...")
+    anonymized_text = gr.Textbox(label="Anonymized Text", interactive=False)
+    name_mapping = gr.JSON(label="Name Mapping")
+    def process_text(input_text):
+        return anonymize_text(input_text)
+    submit_button = gr.Button("Anonymize")
+    submit_button.click(process_text, inputs=[input_text], outputs=[anonymized_text, name_mapping])
+# Launch the app
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ transformers==4.33.0
2	+ gradio==3.36.0