VicidiLochi commited on
Commit
9d3239d
·
1 Parent(s): e154ed7

Add anonymization app

Browse files
Files changed (2) hide show
  1. app.py +71 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+
4
+ # Load a lightweight model
5
+ generator = pipeline("text2text-generation", model="t5-small", tokenizer="t5-small")
6
+
7
+ # Function to construct the anonymization prompt
8
+ def construct_prompt(input_text):
9
+ prompt = f"""
10
+ You are a text anonymizer. Your task is to process a given text, identify all personal names, replace them with unique and realistic pseudonyms, and return both the anonymized text and a dictionary linking the original names to the replacement names.
11
+
12
+ The input text will be provided, and your output should follow this JSON structure:
13
+ {{
14
+ "anonymized_text": "The text with all names replaced by pseudonyms.",
15
+ "name_mapping": {{
16
+ "OriginalName1": "ReplacementName1",
17
+ "OriginalName2": "ReplacementName2"
18
+ }}
19
+ }}
20
+
21
+ ### Guidelines:
22
+ 1. Only replace personal names (e.g., first names, last names, or full names).
23
+ 2. Ensure the pseudonyms are realistic and appropriate for the type of name being replaced.
24
+ 3. Keep all other parts of the text unchanged.
25
+ 4. If no names are found, return the original text with an empty dictionary.
26
+
27
+ Here is the input text:
28
+
29
+ "{input_text}"
30
+
31
+ Please process the input and return your output in the specified JSON format.
32
+ """
33
+ return prompt.strip()
34
+
35
+ # Function to process the input text and generate the anonymized output
36
+ def anonymize_text(input_text):
37
+ # Construct the instruction prompt
38
+ prompt = construct_prompt(input_text)
39
+
40
+ # Generate the output using the model
41
+ response = generator(prompt, max_length=512, num_return_sequences=1)
42
+ generated_text = response[0]['generated_text']
43
+
44
+ # Attempt to parse the JSON output
45
+ try:
46
+ result = eval(generated_text) # Convert the output string to a Python dictionary
47
+ anonymized_text = result.get("anonymized_text", input_text)
48
+ name_mapping = result.get("name_mapping", {})
49
+ except Exception as e:
50
+ anonymized_text = input_text
51
+ name_mapping = {}
52
+
53
+ return anonymized_text, name_mapping
54
+
55
+ # Gradio interface
56
+ with gr.Blocks() as demo:
57
+ gr.Markdown("## Text Anonymizer")
58
+ gr.Markdown("Enter text containing personal names, and the model will anonymize it by replacing the names with pseudonyms. The app will also return a dictionary linking original names to their replacements.")
59
+
60
+ input_text = gr.Textbox(label="Input Text", placeholder="Enter text here...")
61
+ anonymized_text = gr.Textbox(label="Anonymized Text", interactive=False)
62
+ name_mapping = gr.JSON(label="Name Mapping")
63
+
64
+ def process_text(input_text):
65
+ return anonymize_text(input_text)
66
+
67
+ submit_button = gr.Button("Anonymize")
68
+ submit_button.click(process_text, inputs=[input_text], outputs=[anonymized_text, name_mapping])
69
+
70
+ # Launch the app
71
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ transformers==4.33.0
2
+ gradio==3.36.0