Spaces:

PIISA
/

PIISA_Demo

Running

App Files Files Community

PIISA_Demo / app.py

Paulo

fix/library-versions (#5)

84f36c1 verified 15 days ago

raw

history blame contribute delete

6.59 kB

	import os
	from pathlib import Path

	# Set the HF cache directory for the Transformers plugin
	cache_dir = Path.home() / "app"/ "cache"
	cache_dir.mkdir(parents=True, exist_ok=True)
	os.environ["HUGGINGFACE_HUB_CACHE"] = str(cache_dir)

	from pii_process.api import PiiTextProcessor
	from pii_extract.defs import FMT_CONFIG_PLUGIN
	import gradio as gr

	# Test if the cache directory exists
	if cache_dir.is_dir():
	gr.Info("Cache directory created at "+str(cache_dir))
	else:
	gr.Warning("Cache directory creation error")

	# Read examples
	examples = []
	with open("examples.txt", "r") as f:
	examples = f.readlines()
	examples_truncated = [example[:50] + "..." for example in examples]
	language_choices = {
	"English": "en",
	"Italian": "it",
	"Spanish": "es",
	"Portuguese": "pt",
	"German": "de",
	"French": "fr",
	}

	language_code = "en"

	policy_help_string = """
	Policies are defined as follows:

	1. Annotate - replace the PII instance by a <TYPE:VALUE> string, i.e. include both the PII type and its value
	2. Redact - all PII instances are replaced by a <PII> generic string
	3. Placeholder - replace with a prototypical value
	4. Synthetic - substitute with synthetic data

	For more information on the transformation policies, please refer to the guide [here](https://github.com/piisa/pii-transform/blob/main/doc/policies.md#pii-transformation-policies)"""

	header_string = """
	## [PIISA](https://privacyprotection.substack.com/p/towards-a-common-privacy-api-introducing)
	PIISA (Personally Identifiable Information Standard Architecture) is a set of tools to detect and remediate
	PII within large scale language data. It uses best of breed tools like [🤗 transformers](https://huggingface.co/docs/transformers/index) libraries,
	[spaCy](https://spacy.io/), regular expressions, [Faker](https://faker.readthedocs.io/en/master/) and [Presidio](https://microsoft.github.io/presidio/)
	to leverage best practices for effectively managing data privacy in accordance with your privacy policies.
	Important links:
	1. [PIISA API docs](https://github.com/piisa/piisa)
	2. [Blog](https://privacyprotection.substack.com/)
	3. [LinkedIn](https://www.linkedin.com/company/piisa/)

	This demo uses the multi-lingual [wikineural model](https://huggingface.co/Babelscape/wikineural-multilingual-ner) from [Babelscape](https://huggingface.co/Babelscape).

	### &triangle; We're looking for any feedback and/or suggestions, so please open a new thread in the Discussions tab &triangle;
	"""


	def change_language(language_selection):
	global language_code
	language_code = language_choices[language_selection]
	gr.Info(f"{language_selection} selected")


	def process(text, policy):
	# Create the object, defining the language to use and the policy
	# Further customization is possible by providing a config
	policy = policy.lower()
	if text == "":
	print("Empty text field")
	gr.Warning("No text present")
	return ""

	# Custom config to prevent loading of the Presidio plugin
	proc = PiiTextProcessor(
	lang=language_code, default_policy=policy, config="config.json"
	)

	# Process a text buffer and get the transformed buffer
	outbuf = proc(text)
	return outbuf


	def get_full_example(idx):
	return examples[idx]


	with gr.Blocks() as demo:
	with gr.Row():
	with gr.Column():
	gr.Markdown(value=header_string)
	with gr.Column(scale=0, min_width=100):
	pass
	with gr.Column(scale=0, min_width=100):
	logo = gr.Image(
	"image.jpeg",
	height=100,
	width=100,
	show_label=False,
	show_download_button=False,
	show_share_button=False,
	)
	with gr.Row():
	with gr.Column(scale=2, min_width=400):
	text_original = gr.Textbox(
	label="Original Text",
	lines=13,
	placeholder="Enter the text you would like to analyze, or select from one of the examples below",
	)
	with gr.Column(scale=0, min_width=25):
	pass
	with gr.Column(scale=0, min_width=150):
	gr.Markdown(value="""<p style="text-align: center;">Select Language</p>""")
	lang_picker = gr.Dropdown(
	choices=list(language_choices.keys()),
	label="",
	value=list(language_choices.keys())[0],
	type="value",
	container=False,
	)
	lang_picker.select(change_language, inputs=lang_picker, outputs=None)
	gr.Markdown(value="""<p style="text-align: center;">Select Policy</p>""")
	annotate_btn = gr.Button(value="Annotate", variant="primary", size="sm")
	redact_btn = gr.Button(value="Redact", variant="primary", size="sm")
	anonymize_btn = gr.Button(value="Synthetic", variant="primary", size="sm")
	placeholder_btn = gr.Button(
	value="Placeholder", variant="primary", size="sm"
	)

	with gr.Column(scale=0, min_width=25):
	pass
	with gr.Column(
	scale=2,
	min_width=400,
	):
	text_modified = gr.TextArea(
	label="Transformed Text",
	lines=13,
	show_copy_button=True,
	interactive=False,
	)
	annotate_btn.click(
	fn=process, inputs=[text_original, annotate_btn], outputs=text_modified
	)
	redact_btn.click(
	fn=process,
	inputs=[
	text_original,
	gr.Text(value="redact", visible=False),
	],
	outputs=text_modified,
	)
	anonymize_btn.click(
	fn=process,
	inputs=[
	text_original,
	gr.Text(value="synthetic", visible=False),
	],
	outputs=text_modified,
	)
	placeholder_btn.click(
	fn=process,
	inputs=[
	text_original,
	gr.Text(value="placeholder", visible=False),
	],
	outputs=text_modified,
	)
	with gr.Row():
	example_selector = gr.Dropdown(
	examples_truncated, type="index", label="Examples"
	)
	example_selector.select(
	get_full_example, inputs=example_selector, outputs=[text_original]
	)
	with gr.Accordion(label="Help Panel", open=False):
	gr.Markdown(value=policy_help_string)
	demo.queue().launch()