Spaces:

Kiran5
/

Privacy

Build error

App Files Files Community

Privacy / src /privacy /util /code_detect /utils /evaluation.py

Kiran5

Track large files and images with Git LFS

54fa0c8 6 months ago

raw

history blame contribute delete

4.2 kB

	import json

	TAGS = ['EMAIL', 'IP_ADDRESS', 'KEY']


	def load_json(sample):
	try:
	return json.loads(sample)
	except ValueError:
	return []


	def overlapped(a, b, alpha=0.8, beta=0.8):
	"""Returns True if the intervals a and b overlap for more than 80% of their lengths"""
	size_overlap = max(0, min(a[1], b[1]) - max(a[0], b[0]))
	ref_overlap = size_overlap / (b[1] - b[0])
	pred_overlap = size_overlap / (a[1] - a[0])
	return (ref_overlap > alpha and pred_overlap > beta)


	def compare_intervals(references, predictions, alpha=0.8, beta=0.8):
	"""Compare two lists of intervals and return the number
	of true positives, false positives and false negatives.
	>>> compare_intervals([(0, 7), (10, 20)], [(1,8), (99, 119)], 0, 0)[0]
	{'TP': 1, 'FN': 1, 'FP': 1}
	"""
	ref_intervals = sorted(references, key=lambda x: x[0])
	pred_intervals = sorted(predictions, key=lambda x: x[0])
	scores = {"TP": 0, "FN": 0, "FP": 0}
	detected_secrets = []
	for interval in pred_intervals:
	for target in ref_intervals:
	if overlapped(interval, target, alpha, beta):
	# the prediction is a true positive
	scores["TP"] += 1
	detected_secrets.append(interval)
	break
	else:
	# the prediction is a false positive
	scores["FP"] += 1
	# the rest of the targets that aren't detected are false negatives
	scores["FN"] += len(ref_intervals) - len(detected_secrets)
	return scores, detected_secrets


	def recall_precision(metrics_dict):
	"""Compute recall and precision for each tag"""
	metrics = {}
	for tag in TAGS:
	metrics[tag] = {}
	total = metrics_dict[tag]['TP'] + metrics_dict[tag]['FN'] + metrics_dict[tag]['FP']
	if total:
	if not (metrics_dict[tag]['TP'] + metrics_dict[tag]['FN']) or not (metrics_dict[tag]['TP'] + metrics_dict[tag]['FP']):
	# handle division by zero
	metrics[tag] = {'recall': 0, 'precision': 0}
	else:
	metrics[tag]['recall'] = metrics_dict[tag]['TP'] / (metrics_dict[tag]['TP'] + metrics_dict[tag]['FN'])
	metrics[tag]['precision'] = metrics_dict[tag]['TP'] / (metrics_dict[tag]['TP'] + metrics_dict[tag]['FP'])
	else:
	# if there are no annotations, the score is 1
	metrics[tag] = {'recall': 1.0, 'precision': 1.0}
	return metrics


	def recall_precision_all_tags(metrics_dict):
	"""Compute recall and precision for all tags"""
	metrics = {}
	TP = sum([metrics_dict[tag]['TP'] for tag in TAGS])
	FN = sum([metrics_dict[tag]['FN'] for tag in TAGS])
	FP = sum([metrics_dict[tag]['FP'] for tag in TAGS])
	if not (TP + FN) or not (TP + FP):
	metrics = {'recall': 0, 'precision': 0}
	else:
	metrics['recall'] = TP / (TP + FN)
	metrics['precision'] = TP / (TP + FP)
	return metrics


	def evaluate_pii(references, predictions, alpha=0.8, beta=0.8):
	"""Evaluate predictions of PII against references"""
	metrics_dict = {}
	for tag in TAGS:
	ref_intervals = [(e['start'], e['end']) for e in references if e['tag'] == tag]
	pred_intervals = [(e['start'], e['end']) for e in predictions if e['tag'] == tag]
	metrics, _ = compare_intervals(ref_intervals, pred_intervals, alpha, beta)
	metrics_dict[tag] = metrics
	return metrics_dict


	def evaluate_pii_ds(dataset, pred_column='pii', ref_column="secrets", overall_score=False, alpha=0.8, beta=0.8):
	"""Evaluate predictions of PII against references in a dataset
	"""
	metrics_dict = {tag: {'TP': 0, 'FN': 0, 'FP': 0} for tag in TAGS}
	for i in range(len(dataset)):
	ref_list = load_json(dataset[i][ref_column])
	pred_list = load_json(dataset[i][pred_column])
	sample_metrics = evaluate_pii(ref_list, pred_list, alpha, beta)
	for tag in TAGS:
	for metric in metrics_dict[tag]:
	metrics_dict[tag][metric] += sample_metrics[tag][metric]
	if overall_score:
	return recall_precision_all_tags(metrics_dict), metrics_dict
	return recall_precision(metrics_dict), metrics_dict