|
openapi: 3.0.0 |
|
info: |
|
description: Context aware, pluggable and customizable PII anonymization service for text and images. |
|
version: "2.0" |
|
title: Presidio |
|
contact: |
|
name: Presidio support |
|
email: [email protected] |
|
url: https://github.com/microsoft/presidio |
|
x-logo: |
|
url: "https://upload.wikimedia.org/wikipedia/commons/4/44/Microsoft_logo.svg" |
|
license: |
|
name: MIT |
|
url: 'https://github.com/microsoft/presidio/blob/main/LICENSE' |
|
externalDocs: |
|
description: Presidio documentation. |
|
url: 'https://microsoft.github.io/presidio/' |
|
tags: |
|
- name: Analyzer |
|
description: Detecting PII entities in text |
|
- name: Anonymizer |
|
description: Anonymizing detected PII text entities with desired values. |
|
|
|
paths: |
|
|
|
/analyze: |
|
post: |
|
servers: |
|
- url: https://presidio-analyzer-prod.azurewebsites.net |
|
tags: |
|
- Analyzer |
|
summary: "Analyze Text" |
|
description: "Recognizes PII entities in a given text and returns their types, locations and score" |
|
requestBody: |
|
$ref: "#/components/requestBodies/AnalyzeRequest" |
|
|
|
responses: |
|
200: |
|
description: OK |
|
content: |
|
application/json: |
|
schema: |
|
description: "A list analysis results" |
|
type: array |
|
items: |
|
$ref: "#/components/schemas/RecognizerResultWithAnaysisExplanation" |
|
|
|
examples: |
|
Enhanced response: |
|
value: |
|
[ |
|
{ "entity_type": "PERSON", "start": 0, "end": 10, "score": 0.85, |
|
"analysis_explanation": { |
|
"recognizer": "SpacyRecognizer", "pattern_name": null, "pattern": null, "original_score": 0.85, |
|
"score": 0.85, "textual_explanation": "Identified as PERSON by Spacy's Named Entity Recognition", |
|
"score_context_improvement": 0, "supportive_context_word": "", "validation_result": null |
|
}, |
|
"recognition_metadata": { |
|
"recognizer_name": "SpacyRecognizer" |
|
} |
|
}, |
|
{ "entity_type": "US_DRIVER_LICENSE", "start": 30, "end": 38, "score": 0.6499999999999999, |
|
"analysis_explanation": { |
|
"recognizer": "UsLicenseRecognizer", "pattern_name": "Driver License - Alphanumeric (weak)", |
|
"pattern": "\\\\b([A-Z][0-9]{3,6}|[A-Z][0-9]{5,9}|[A-Z][0-9]{6,8}|[A-Z][0-9]{4,8}|[A-Z][0-9]{9,11}|[A-Z]{1,2}[0-9]{5,6}|H[0-9]{8}|V[0-9]{6}|X[0-9]{8}|A-Z]{2}[0-9]{2,5}|[A-Z]{2}[0-9]{3,7}|[0-9]{2}[A-Z]{3}[0-9]{5,6}|[A-Z][0-9]{13,14}|[A-Z][0-9]{18}|[A-Z][0-9]{6}R|[A-Z][0-9]{9}|[A-Z][0-9]{1,12}|[0-9]{9}[A-Z]|[A-Z]{2}[0-9]{6}[A-Z]|[0-9]{8}[A-Z]{2}|[0-9]{3}[A-Z]{2}[0-9]{4}|[A-Z][0-9][A-Z][0-9][A-Z]|[0-9]{7,8}[A-Z])\\\\b", |
|
"original_score": 0.3, "score": 0.6499999999999999, "textual_explanation": null, |
|
"score_context_improvement": 0.3499999999999999, "supportive_context_word": "driver", |
|
"validation_result": null |
|
}, |
|
"recognition_metadata": { |
|
"recognizer_name": "UsLicenseRecognizer" |
|
} |
|
} |
|
] |
|
Lean response: |
|
value: |
|
[ |
|
{ |
|
"analysis_explanation": null, |
|
"end": 38, |
|
"entity_type": "US_DRIVER_LICENSE", |
|
"score": 0.6499999999999999, |
|
"start": 30, |
|
"recognition_metadata": { |
|
"recognizer_name": "UsLicenseRecognizer" |
|
} |
|
} |
|
] |
|
|
|
/recognizers: |
|
get: |
|
servers: |
|
- url: https://presidio-analyzer-prod.azurewebsites.net |
|
tags: |
|
- Analyzer |
|
summary: "Get Recognizers" |
|
description: "Get the available PII recognizers for a given language" |
|
parameters: |
|
- in: query |
|
name: language |
|
schema: |
|
type: string |
|
example: en |
|
description: "Two characters for the desired language in ISO_639-1 format" |
|
responses: |
|
200: |
|
description: OK |
|
content: |
|
application/json: |
|
schema: |
|
description: "A list of supported recognizers" |
|
type: array |
|
items: |
|
type: string |
|
description: "Recognizer name" |
|
example: |
|
[ |
|
"CryptoRecognizer", "CreditCardRecognizer", "IbanRecognizer", "UsPhoneRecognizer", |
|
"EmailRecognizer","UsPassportRecognizer", "NhsRecognizer", "IpRecognizer", |
|
"SpacyRecognizer","SgFinRecognizer", "UsSsnRecognizer","UsBankRecognizer", |
|
"DomainRecognizer", "UsLicenseRecognizer", "UsItinRecognizer" |
|
] |
|
|
|
/supportedentities: |
|
get: |
|
servers: |
|
- url: https://presidio-analyzer-prod.azurewebsites.net |
|
tags: |
|
- Analyzer |
|
summary: "Get supported entities" |
|
description: "Get the list of PII entities Presidio-Analyzer is capable of detecting" |
|
parameters: |
|
- in: query |
|
name: language |
|
schema: |
|
type: string |
|
example: en |
|
description: "Two characters for the desired language in ISO_639-1 format" |
|
responses: |
|
200: |
|
description: OK |
|
content: |
|
application/json: |
|
schema: |
|
description: "A list of supported entities" |
|
type: array |
|
items: |
|
$ref: "#/components/schemas/EntityTypes" |
|
example: |
|
[ "PHONE_NUMBER", "US_DRIVER_LICENSE", "US_PASSPORT", "LOCATION", "CREDIT_CARD", "CRYPTO", |
|
"UK_NHS", "US_SSN", "US_BANK_NUMBER", "EMAIL_ADDRESS", "DATE_TIME", "IP_ADDRESS", "PERSON", "IBAN_CODE", |
|
"NRP", "US_ITIN", "MEDICAL_LICENSE", "URL" ] |
|
|
|
/anonymize: |
|
post: |
|
servers: |
|
- url: https://presidio-anonymizer-prod.azurewebsites.net |
|
tags: |
|
- Anonymizer |
|
summary: "Anonymize Text" |
|
requestBody: |
|
$ref: "#/components/requestBodies/AnonymizeRequest" |
|
|
|
responses: |
|
200: |
|
description: OK |
|
content: |
|
application/json: |
|
schema: |
|
$ref: "#/components/schemas/AnonymizeResponse" |
|
examples: |
|
Replace and Redact Anonymizers: |
|
value: |
|
{ "text": "hello world, my name is ANONYMIZED. My number is: ", "items": [ { "operator": "redact", "entity_type": "PHONE_NUMBER", "start": 50, "end": 50, "text": "" }, { "operator": "replace", "entity_type": "NAME", "start": 24, "end": 34, "text": "ANONYMIZED" } ] } |
|
Replace as default Anonymizer: |
|
value: |
|
{ "text": "hello world, my name is ANONYMIZED. My number is: ANONYMIZED", "items": [ { "operator": "replace", "entity_type": "PHONE_NUMBER", "start": 50, "end": 60, "text": "ANONYMIZED" }, { "operator": "replace", "entity_type": "NAME", "start": 24, "end": 34, "text": "ANONYMIZED" } ] } |
|
400: |
|
$ref: "#/components/responses/400BadRequest" |
|
|
|
422: |
|
$ref: "#/components/responses/422UnprocessableEntity" |
|
|
|
/anonymizers: |
|
get: |
|
servers: |
|
- url: https://presidio-anonymizer-prod.azurewebsites.net |
|
tags: |
|
- Anonymizer |
|
summary: "Get supported anonymizers" |
|
|
|
responses: |
|
200: |
|
description: OK |
|
content: |
|
application/json: |
|
schema: |
|
description: "A list of all built-in supported anonymizers" |
|
type: array |
|
items: |
|
description: "The Anonymizer name" |
|
type: string |
|
example: "mask" |
|
example: |
|
[ "hash", "mask", "redact", "replace", "encrypt" ] |
|
|
|
|
|
/deanonymize: |
|
post: |
|
servers: |
|
- url: https://presidio-anonymizer-prod.azurewebsites.net |
|
tags: |
|
- Anonymizer |
|
summary: "Deanonymize Text" |
|
requestBody: |
|
$ref: "#/components/requestBodies/DeanonymizeRequest" |
|
|
|
responses: |
|
200: |
|
description: OK |
|
content: |
|
application/json: |
|
schema: |
|
$ref: "#/components/schemas/DeanonymizeResponse" |
|
examples: |
|
Decrypt Single PII: |
|
value: |
|
{ "text": "text_for_encryption", "items": [ { "start": 0, "end": 19, "operator":"decrypt", "text": "text_for_encryption","entity_type": "NUMBER" } ] } |
|
400: |
|
$ref: "#/components/responses/400BadRequest" |
|
|
|
422: |
|
$ref: "#/components/responses/422UnprocessableEntity" |
|
|
|
/deanonymizers: |
|
get: |
|
servers: |
|
- url: https://presidio-anonymizer-prod.azurewebsites.net |
|
tags: |
|
- Anonymizer |
|
summary: "Get supported deanonymizers" |
|
|
|
responses: |
|
200: |
|
description: OK |
|
content: |
|
application/json: |
|
schema: |
|
description: "A list of all built-in supported deanonymizers" |
|
type: array |
|
items: |
|
description: "The Deanonymizer name" |
|
type: string |
|
example: "decrypt" |
|
example: |
|
[ "decrypt" ] |
|
|
|
/health: |
|
get: |
|
servers: |
|
- url: https://presidio-anonymizer-prod.azurewebsites.net |
|
tags: |
|
- Anonymizer |
|
- Analyzer |
|
summary: "Healthcheck" |
|
responses: |
|
200: |
|
description: OK |
|
content: |
|
text/plain: |
|
schema: |
|
type: string |
|
example: Presidio Anonymizer service is up |
|
|
|
components: |
|
requestBodies: |
|
AnalyzeRequest: |
|
required: true |
|
content: |
|
application/json: |
|
schema: |
|
$ref: "#/components/schemas/AnalyzeRequest" |
|
examples: |
|
Minimal Request: |
|
value: |
|
{ |
|
"text": "John Smith drivers license is AC432223", |
|
"language": "en" |
|
} |
|
Enhanced Request : |
|
value: |
|
{ |
|
"text": "John Smith drivers license is AC432223 and the zip code is 12345", |
|
"language": "en", |
|
"return_decision_process": false, |
|
"correlation_id": "123e4567-e89b-12d3-a456-426614174000", |
|
"score_threshold": 0.6, |
|
"entities": ["US_DRIVER_LICENSE", "ZIP"], |
|
"trace": false, |
|
"ad_hoc_recognizers":[ |
|
{ |
|
"name": "Zip code Recognizer", |
|
"supported_language": "en", |
|
"patterns": [ |
|
{ |
|
"name": "zip code (weak)", |
|
"regex": "(\\b\\d{5}(?:\\-\\d{4})?\\b)", |
|
"score": 0.01 |
|
} |
|
], |
|
"context": ["zip", "code"], |
|
"supported_entity":"ZIP" |
|
} |
|
] |
|
} |
|
|
|
AnonymizeRequest: |
|
required: true |
|
content: |
|
application/json: |
|
schema: |
|
$ref: "#/components/schemas/AnonymizeRequest" |
|
examples: |
|
Replace and Redact Anonymizers: |
|
value: |
|
{ |
|
"text": "hello world, my name is Jane Doe. My number is: 034453334", |
|
"anonymizers": { |
|
"PERSON": { "type": "redact" }, |
|
"PHONE_NUMBER": { "type": "replace", "new_value": "ANONYMIZED" } |
|
}, |
|
"analyzer_results": [ |
|
{ "start": 24, "end": 32, "score": 0.8, "entity_type": "PERSON" }, |
|
{ "start": 48, "end": 57, "score": 0.95, "entity_type": "PHONE_NUMBER" } |
|
] |
|
} |
|
Replace as default Anonymizer: |
|
value: |
|
{ |
|
"text": "hello world, my name is Jane Doe.", |
|
"anonymizers": { |
|
"DEFAULT": { "type": "replace", "new_value": "ANONYMIZED" }, |
|
}, |
|
"analyzer_results": [ |
|
{ "start": 24, "end": 32, "score": 0.8, "entity_type": "PERSON" }, |
|
] |
|
} |
|
|
|
DeanonymizeRequest: |
|
required: true |
|
content: |
|
application/json: |
|
schema: |
|
$ref: "#/components/schemas/DeanonymizeRequest" |
|
examples: |
|
Deanonymize text: |
|
value: |
|
{ |
|
"text": "My name is S184CMt9Drj7QaKQ21JTrpYzghnboTF9pn/neN8JME0=", |
|
"deanonymizers": { |
|
"PERSON": { |
|
"type": "decrypt", |
|
"key": "WmZq4t7w!z%C&F)J" |
|
} |
|
}, |
|
"anonymizer_results": [ { |
|
"start": 11, |
|
"end": 55, |
|
"entity_type": "PERSON" |
|
} ] |
|
} |
|
|
|
|
|
schemas: |
|
AnalyzeRequest: |
|
type: object |
|
required: |
|
- text |
|
- language |
|
properties: |
|
text: |
|
type: string |
|
description: "The text to analyze" |
|
example: "hello world, my name is Jane Doe. My number is: 034453334" |
|
language: |
|
type: string |
|
description: "Two characters for the desired language in ISO_639-1 format" |
|
example: "en" |
|
correlation_id: |
|
type: string |
|
description: "A correlation id to append to headers and traces" |
|
score_threshold: |
|
type: number |
|
format: double |
|
description: "The minimal detection score threshold" |
|
entities: |
|
type: array |
|
items: |
|
$ref: "#/components/schemas/EntityTypes" |
|
description: "A list of entities to analyze" |
|
return_decision_process: |
|
type: boolean |
|
description: "Whether to include analysis explanation in the response" |
|
ad_hoc_recognizers: |
|
type: array |
|
description: "list of recognizers to be used in the context of this request only (ad-hoc)." |
|
items: |
|
$ref: "#/components/schemas/PatternRecognizer" |
|
context: |
|
type: array |
|
description: "list of context words which may help to raise recognized entities confidence" |
|
items: |
|
description: "The context word" |
|
type: string |
|
example: "address" |
|
|
|
AnonymizeRequest: |
|
type: object |
|
required: |
|
- text |
|
- analyzer_results |
|
properties: |
|
text: |
|
type: string |
|
description: "The text to anonymize" |
|
example: "hello world, my name is Jane Doe. My number is: 034453334" |
|
anonymizers: |
|
description: "Object where the key is DEFAULT or the ENTITY_TYPE and the value is the anonymizer definition" |
|
type: object |
|
additionalProperties: |
|
anyOf: |
|
- $ref: "#/components/schemas/Replace" |
|
- $ref: "#/components/schemas/Redact" |
|
- $ref: "#/components/schemas/Mask" |
|
- $ref: "#/components/schemas/Hash" |
|
- $ref: "#/components/schemas/Encrypt" |
|
default: |
|
{ "DEFAULT": { "type": "replace", "new_value": "<ENTITY_TYPE>" } } |
|
|
|
analyzer_results: |
|
type: array |
|
description: "Array of analyzer detections" |
|
items: |
|
$ref: "#/components/schemas/RecognizerResult" |
|
|
|
DeanonymizeRequest: |
|
type: object |
|
required: |
|
- text |
|
- anonymizer_results |
|
- deanonymizers |
|
properties: |
|
text: |
|
type: string |
|
description: "The anonymized text" |
|
example: "My name is S184CMt9Drj7QaKQ21JTrpYzghnboTF9pn/neN8JME0=" |
|
deanonymizers: |
|
description: "Object where the key is DEFAULT or the ENTITY_TYPE and the value is decrypt since it is the only one supported" |
|
type: object |
|
additionalProperties: |
|
anyOf: |
|
- $ref: "#/components/schemas/Decrypt" |
|
default: |
|
{ "DEFAULT": { "type": "decrypt", "key": "3t6w9z$C&F)J@NcR" } } |
|
anonymizer_results: |
|
type: array |
|
description: "Array of anonymized PIIs" |
|
items: |
|
$ref: "#/components/schemas/OperatorResult" |
|
|
|
|
|
RecognizerResult: |
|
type: object |
|
required: |
|
- start |
|
- end |
|
- score |
|
- entity_type |
|
properties: |
|
start: |
|
type: integer |
|
description: "Where the PII starts" |
|
example: 24 |
|
end: |
|
type: integer |
|
description: "Where the PII ends" |
|
example: 32 |
|
score: |
|
type: number |
|
format: double |
|
description: "The PII detection score" |
|
example: 0.8 |
|
entity_type: |
|
$ref: "#/components/schemas/EntityTypes" |
|
recognition_metadata: |
|
type: object |
|
$ref: "#/components/schemas/RecognizedMetadata" |
|
|
|
RecognizedMetadata: |
|
type: object |
|
properties: |
|
recognizer_name: |
|
type: string |
|
description: "Name of recognizer that made the decision" |
|
|
|
RecognizerResultWithAnaysisExplanation: |
|
allOf: |
|
- $ref: '#/components/schemas/RecognizerResult' |
|
- type: object |
|
properties: |
|
analysis_explanation: |
|
$ref: "#/components/schemas/AnalysisExplanation" |
|
|
|
AnalysisExplanation: |
|
type: object |
|
properties: |
|
recognizer: |
|
type: string |
|
description: "Name of recognizer that made the decision" |
|
pattern_name: |
|
type: string |
|
description: "name of pattern (if decision was made by a PatternRecognizer)" |
|
pattern: |
|
type: string |
|
description: "Regex pattern that was applied (if PatternRecognizer)" |
|
original_score: |
|
type: number |
|
format: double |
|
description: "Recognizer's confidence in result" |
|
score: |
|
type: number |
|
format: double |
|
description: "The PII detection score" |
|
textual_explanation: |
|
type: string |
|
description: "Free text for describing a decision of a logic or model" |
|
score_context_improvement: |
|
type: number |
|
format: double |
|
description: "Difference from the original score" |
|
supportive_context_word: |
|
type: string |
|
description: "The context word which helped increase the score" |
|
validation_result: |
|
type: number |
|
format: double |
|
description: "Result of a validation (e.g. checksum)" |
|
|
|
Pattern: |
|
type: object |
|
properties: |
|
name: |
|
type: string |
|
description: "Name of regular expression pattern" |
|
regex: |
|
type: string |
|
description: "Regex pattern string" |
|
score: |
|
type: number |
|
format: double |
|
description: "Detection confidence of this pattern (0.01 if very noisy, 0.6-1.0 if very specific)" |
|
|
|
|
|
PatternRecognizer: |
|
type: object |
|
description: "A regular expressions or deny-list based recognizer" |
|
properties: |
|
name: |
|
type: string |
|
description: "Name of recognizer" |
|
supported_language: |
|
type: string |
|
description: "Language code supported by this recognizer" |
|
patterns: |
|
description: "List of type Pattern containing regex expressions with additional metadata." |
|
type: array |
|
items: |
|
$ref: "#/components/schemas/Pattern" |
|
deny_list: |
|
type: array |
|
description: "List of words to be returned as PII if found." |
|
items: |
|
type: string |
|
context: |
|
description: "List of words to be used to increase confidence if found in the vicinity of detected entities." |
|
type: array |
|
items: |
|
type: string |
|
supported_entity: |
|
type: string |
|
description: "The name of entity this ad hoc recognizer detects" |
|
|
|
EntityTypes: |
|
description: "The supported PII entity types." |
|
type: string |
|
example: PERSON |
|
|
|
Replace: |
|
title: Replace |
|
description: "Replace with a given value" |
|
type: object |
|
required: |
|
- type |
|
- new_value |
|
properties: |
|
type: |
|
type: string |
|
description: "replace" |
|
example: replace |
|
new_value: |
|
type: string |
|
description: "The string to replace with" |
|
example: VALUE |
|
|
|
Redact: |
|
title: Redact |
|
description: "Replace with an empty string" |
|
type: object |
|
required: |
|
- type |
|
properties: |
|
type: |
|
type: string |
|
description: "redact" |
|
example: redact |
|
|
|
Mask: |
|
title: Mask |
|
description: "Replace with a given character" |
|
type: object |
|
required: |
|
- type |
|
- masking_char |
|
- chars_to_mask |
|
properties: |
|
type: |
|
type: string |
|
description: "mask" |
|
example: mask |
|
masking_char: |
|
type: string |
|
description: "The replacement character" |
|
example: "*" |
|
chars_to_mask: |
|
type: integer |
|
description: "The amount of characters that should be replaced" |
|
example: 4 |
|
from_end: |
|
type: boolean |
|
description: "Whether to mask the PII from it's end" |
|
example: true |
|
default: false |
|
|
|
Hash: |
|
title: Hash |
|
description: "Replace with hashed value" |
|
type: object |
|
required: |
|
- type |
|
properties: |
|
type: |
|
type: string |
|
description: "hash" |
|
example: hash |
|
hash_type: |
|
type: string |
|
description: "The hashing algorithm" |
|
enum: |
|
- md5 |
|
- sha256 |
|
- sha512 |
|
example: md5 |
|
default: md5 |
|
|
|
Encrypt: |
|
title: Encrypt |
|
description: "Replace with an encrypted value" |
|
type: object |
|
required: |
|
- type |
|
- key |
|
properties: |
|
type: |
|
type: string |
|
description: "encrypt" |
|
example: encrypt |
|
key: |
|
type: string |
|
description: "Cryptographic key of length 128, 192 or 256 bits, in a string format" |
|
example: "3t6w9z$C&F)J@NcR" |
|
|
|
Decrypt: |
|
title: Decrypt |
|
description: "Replace encrypted PII decrypted text" |
|
type: object |
|
required: |
|
- type |
|
- key |
|
properties: |
|
type: |
|
type: string |
|
description: "decrypt" |
|
example: decrypt |
|
key: |
|
type: string |
|
description: "Cryptographic key of length 128, 192 or 256 bits, in a string format" |
|
example: "3t6w9z$C&F)J@NcR" |
|
|
|
AnonymizeResponse: |
|
type: object |
|
properties: |
|
text: |
|
type: string |
|
items: |
|
type: array |
|
description: "Array of anonymized entities" |
|
items: |
|
$ref: "#/components/schemas/OperatorResult" |
|
|
|
OperatorResult: |
|
required: |
|
- start |
|
- end |
|
- entity_type |
|
type: object |
|
properties: |
|
operator: |
|
type: string |
|
description: "Name of the used operator" |
|
entity_type: |
|
type: string |
|
description: "Type of the PII entity" |
|
start: |
|
type: integer |
|
description: "Start index of the changed text" |
|
end: |
|
type: integer |
|
description: "End index in the changed text" |
|
text: |
|
type: string |
|
description: "The new text returned" |
|
|
|
DeanonymizeResponse: |
|
type: object |
|
properties: |
|
text: |
|
type: string |
|
items: |
|
type: array |
|
description: "Array of deanonymized entities" |
|
items: |
|
$ref: "#/components/schemas/OperatorResult" |
|
|
|
responses: |
|
400BadRequest: |
|
description: Bad request |
|
content: |
|
application/json: |
|
schema: |
|
type: object |
|
properties: |
|
error: |
|
type: string |
|
example: "Invalid request json" |
|
|
|
422UnprocessableEntity: |
|
description: Unprocessable Entity |
|
content: |
|
application/json: |
|
schema: |
|
type: object |
|
properties: |
|
error: |
|
type: string |
|
example: "Invalid input, text can not be empty" |
|
|