PIISA_Demo / app.py
shamikbose89's picture
Update app.py
969e6e1
import gradio as gr
import os
from pii_transform.api.e2e import PiiTextProcessor
from pii_extract.defs import FMT_CONFIG_PLUGIN
examples = []
with open("examples.txt", "r") as f:
examples = f.readlines()
examples_truncated = [example[:50] + "..." for example in examples]
language_choices = {
"English": "en",
"Italian": "it",
"Spanish": "es",
"Portuguese": "pt",
"German": "de",
"French": "fr",
}
language_code = "en"
cache_dir = "/home/user/app/cache"
os.makedirs(cache_dir, exist_ok=True)
if os.path.isdir(cache_dir):
gr.Info("Cache directory created at "+cache_dir)
else:
gr.Warning("Cache directory creation error")
policy_help_string = """
Policies are defined as follows:
1. **Annotate** - replace the PII instance by a \<TYPE:VALUE\> string, i.e. include both the PII type and its value
2. **Redact** - all PII instances are replaced by a \<PII\> generic string
3. **Placeholder** - replace with a prototypical value
4. **Synthetic** - substitute with synthetic data
For more information on the transformation policies, please refer to the guide [here](https://github.com/piisa/pii-transform/blob/main/doc/policies.md#pii-transformation-policies)"""
header_string = """
## [PIISA](https://privacyprotection.substack.com/p/towards-a-common-privacy-api-introducing)
**PIISA** (Personally Identifiable Information Standard Architecture) is a set of tools to detect and remediate
PII within large scale language data. It uses best of breed tools like [πŸ€— transformers](https://huggingface.co/docs/transformers/index) libraries,
[spaCy](https://spacy.io/), regular expressions, [Faker](https://faker.readthedocs.io/en/master/) and [Presidio](https://microsoft.github.io/presidio/)
to leverage best practices for effectively managing data privacy in accordance with your privacy policies.
Important links:
1. [PIISA API docs](https://github.com/piisa/piisa)
2. [Blog](https://privacyprotection.substack.com/)
3. [LinkedIn](https://www.linkedin.com/company/piisa/)
This demo uses the multi-lingual [wikineural model](https://huggingface.co/Babelscape/wikineural-multilingual-ner) from [Babelscape](https://huggingface.co/Babelscape).
### &triangle; We're looking for any feedback and/or suggestions, so please open a new thread in the Discussions tab &triangle;
"""
def change_language(language_selection):
global language_code
language_code = language_choices[language_selection]
gr.Info(f"{language_selection} selected")
def process(text, policy):
# Create the object, defining the language to use and the policy
# Further customization is possible by providing a config
policy = policy.lower()
if text == "":
print("Empty text field")
gr.Warning("No text present")
return ""
# Custom config to prevent loading of the Presidio plugin
proc = PiiTextProcessor(
lang=language_code, default_policy=policy, config="config.json"
)
# Process a text buffer and get the transformed buffer
outbuf = proc(text)
return outbuf
def get_full_example(idx):
return examples[idx]
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
gr.Markdown(value=header_string)
with gr.Column(scale=0, min_width=100):
pass
with gr.Column(scale=0, min_width=100):
logo = gr.Image(
"image.jpeg",
height=100,
width=100,
show_label=False,
show_download_button=False,
show_share_button=False,
mask_opacity=1.0,
)
with gr.Row():
with gr.Column(scale=2, min_width=400):
text_original = gr.Textbox(
label="Original Text",
lines=13,
placeholder="Enter the text you would like to analyze, or select from one of the examples below",
)
with gr.Column(scale=0, min_width=25):
pass
with gr.Column(scale=0, min_width=150):
gr.Markdown(value="""<p style="text-align: center;">Select Language</p>""")
lang_picker = gr.Dropdown(
choices=list(language_choices.keys()),
label="",
value=list(language_choices.keys())[0],
type="value",
container=False,
)
lang_picker.select(change_language, inputs=lang_picker, outputs=None)
gr.Markdown(value="""<p style="text-align: center;">Select Policy</p>""")
annotate_btn = gr.Button(value="Annotate", variant="primary", size="sm")
redact_btn = gr.Button(value="Redact", variant="primary", size="sm")
anonymize_btn = gr.Button(value="Synthetic", variant="primary", size="sm")
placeholder_btn = gr.Button(
value="Placeholder", variant="primary", size="sm"
)
with gr.Column(scale=0, min_width=25):
pass
with gr.Column(
scale=2,
min_width=400,
):
text_modified = gr.TextArea(
label="Transformed Text",
lines=13,
show_copy_button=True,
interactive=False,
)
annotate_btn.click(
fn=process, inputs=[text_original, annotate_btn], outputs=text_modified
)
redact_btn.click(
fn=process,
inputs=[
text_original,
gr.Text(value="redact", visible=False),
],
outputs=text_modified,
)
anonymize_btn.click(
fn=process,
inputs=[
text_original,
gr.Text(value="synthetic", visible=False),
],
outputs=text_modified,
)
placeholder_btn.click(
fn=process,
inputs=[
text_original,
gr.Text(value="placeholder", visible=False),
],
outputs=text_modified,
)
with gr.Row():
example_selector = gr.Dropdown(
examples_truncated, type="index", label="Examples"
)
example_selector.select(
get_full_example, inputs=example_selector, outputs=[text_original]
)
with gr.Accordion(label="Help Panel", open=False):
gr.Markdown(value=policy_help_string)
demo.queue().launch()