Spaces:
Sleeping
Sleeping
Commit
·
9d3239d
1
Parent(s):
e154ed7
Add anonymization app
Browse files- app.py +71 -0
- requirements.txt +2 -0
app.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import pipeline
|
3 |
+
|
4 |
+
# Load a lightweight model
|
5 |
+
generator = pipeline("text2text-generation", model="t5-small", tokenizer="t5-small")
|
6 |
+
|
7 |
+
# Function to construct the anonymization prompt
|
8 |
+
def construct_prompt(input_text):
|
9 |
+
prompt = f"""
|
10 |
+
You are a text anonymizer. Your task is to process a given text, identify all personal names, replace them with unique and realistic pseudonyms, and return both the anonymized text and a dictionary linking the original names to the replacement names.
|
11 |
+
|
12 |
+
The input text will be provided, and your output should follow this JSON structure:
|
13 |
+
{{
|
14 |
+
"anonymized_text": "The text with all names replaced by pseudonyms.",
|
15 |
+
"name_mapping": {{
|
16 |
+
"OriginalName1": "ReplacementName1",
|
17 |
+
"OriginalName2": "ReplacementName2"
|
18 |
+
}}
|
19 |
+
}}
|
20 |
+
|
21 |
+
### Guidelines:
|
22 |
+
1. Only replace personal names (e.g., first names, last names, or full names).
|
23 |
+
2. Ensure the pseudonyms are realistic and appropriate for the type of name being replaced.
|
24 |
+
3. Keep all other parts of the text unchanged.
|
25 |
+
4. If no names are found, return the original text with an empty dictionary.
|
26 |
+
|
27 |
+
Here is the input text:
|
28 |
+
|
29 |
+
"{input_text}"
|
30 |
+
|
31 |
+
Please process the input and return your output in the specified JSON format.
|
32 |
+
"""
|
33 |
+
return prompt.strip()
|
34 |
+
|
35 |
+
# Function to process the input text and generate the anonymized output
|
36 |
+
def anonymize_text(input_text):
|
37 |
+
# Construct the instruction prompt
|
38 |
+
prompt = construct_prompt(input_text)
|
39 |
+
|
40 |
+
# Generate the output using the model
|
41 |
+
response = generator(prompt, max_length=512, num_return_sequences=1)
|
42 |
+
generated_text = response[0]['generated_text']
|
43 |
+
|
44 |
+
# Attempt to parse the JSON output
|
45 |
+
try:
|
46 |
+
result = eval(generated_text) # Convert the output string to a Python dictionary
|
47 |
+
anonymized_text = result.get("anonymized_text", input_text)
|
48 |
+
name_mapping = result.get("name_mapping", {})
|
49 |
+
except Exception as e:
|
50 |
+
anonymized_text = input_text
|
51 |
+
name_mapping = {}
|
52 |
+
|
53 |
+
return anonymized_text, name_mapping
|
54 |
+
|
55 |
+
# Gradio interface
|
56 |
+
with gr.Blocks() as demo:
|
57 |
+
gr.Markdown("## Text Anonymizer")
|
58 |
+
gr.Markdown("Enter text containing personal names, and the model will anonymize it by replacing the names with pseudonyms. The app will also return a dictionary linking original names to their replacements.")
|
59 |
+
|
60 |
+
input_text = gr.Textbox(label="Input Text", placeholder="Enter text here...")
|
61 |
+
anonymized_text = gr.Textbox(label="Anonymized Text", interactive=False)
|
62 |
+
name_mapping = gr.JSON(label="Name Mapping")
|
63 |
+
|
64 |
+
def process_text(input_text):
|
65 |
+
return anonymize_text(input_text)
|
66 |
+
|
67 |
+
submit_button = gr.Button("Anonymize")
|
68 |
+
submit_button.click(process_text, inputs=[input_text], outputs=[anonymized_text, name_mapping])
|
69 |
+
|
70 |
+
# Launch the app
|
71 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
transformers==4.33.0
|
2 |
+
gradio==3.36.0
|