openapi: 3.0.0 |
info: |
description: Context aware, pluggable and customizable PII anonymization service for text and images. |
version: "2.0" |
title: Presidio |
contact: |
name: Presidio support |
email: [email protected] |
url: https://github.com/microsoft/presidio |
x-logo: |
url: "https://upload.wikimedia.org/wikipedia/commons/4/44/Microsoft_logo.svg" |
license: |
name: MIT |
url: 'https://github.com/microsoft/presidio/blob/main/LICENSE' |
externalDocs: |
description: Presidio documentation. |
url: 'https://microsoft.github.io/presidio/' |
tags: |
- name: Analyzer |
description: Detecting PII entities in text |
- name: Anonymizer |
description: Anonymizing detected PII text entities with desired values. |
paths: |
/analyze: |
post: |
servers: |
- url: https://presidio-analyzer-prod.azurewebsites.net |
tags: |
- Analyzer |
summary: "Analyze Text" |
description: "Recognizes PII entities in a given text and returns their types, locations and score" |
requestBody: |
$ref: "#/components/requestBodies/AnalyzeRequest" |
responses: |
200: |
description: OK |
content: |
application/json: |
schema: |
description: "A list analysis results" |
type: array |
items: |
$ref: "#/components/schemas/RecognizerResultWithAnaysisExplanation" |
examples: |
Enhanced response: |
value: |
[ |
{ "entity_type": "PERSON", "start": 0, "end": 10, "score": 0.85, |
"analysis_explanation": { |
"recognizer": "SpacyRecognizer", "pattern_name": null, "pattern": null, "original_score": 0.85, |
"score": 0.85, "textual_explanation": "Identified as PERSON by Spacy's Named Entity Recognition", |
"score_context_improvement": 0, "supportive_context_word": "", "validation_result": null |
}, |
"recognition_metadata": { |
"recognizer_name": "SpacyRecognizer" |
} |
}, |
{ "entity_type": "US_DRIVER_LICENSE", "start": 30, "end": 38, "score": 0.6499999999999999, |
"analysis_explanation": { |
"recognizer": "UsLicenseRecognizer", "pattern_name": "Driver License - Alphanumeric (weak)", |
"pattern": "\\\\b([A-Z][0-9]{3,6}|[A-Z][0-9]{5,9}|[A-Z][0-9]{6,8}|[A-Z][0-9]{4,8}|[A-Z][0-9]{9,11}|[A-Z]{1,2}[0-9]{5,6}|H[0-9]{8}|V[0-9]{6}|X[0-9]{8}|A-Z]{2}[0-9]{2,5}|[A-Z]{2}[0-9]{3,7}|[0-9]{2}[A-Z]{3}[0-9]{5,6}|[A-Z][0-9]{13,14}|[A-Z][0-9]{18}|[A-Z][0-9]{6}R|[A-Z][0-9]{9}|[A-Z][0-9]{1,12}|[0-9]{9}[A-Z]|[A-Z]{2}[0-9]{6}[A-Z]|[0-9]{8}[A-Z]{2}|[0-9]{3}[A-Z]{2}[0-9]{4}|[A-Z][0-9][A-Z][0-9][A-Z]|[0-9]{7,8}[A-Z])\\\\b", |
"original_score": 0.3, "score": 0.6499999999999999, "textual_explanation": null, |
"score_context_improvement": 0.3499999999999999, "supportive_context_word": "driver", |
"validation_result": null |
}, |
"recognition_metadata": { |
"recognizer_name": "UsLicenseRecognizer" |
} |
} |
] |
Lean response: |
value: |
[ |
{ |
"analysis_explanation": null, |
"end": 38, |
"entity_type": "US_DRIVER_LICENSE", |
"score": 0.6499999999999999, |
"start": 30, |
"recognition_metadata": { |
"recognizer_name": "UsLicenseRecognizer" |
} |
} |
] |
/recognizers: |
get: |
servers: |
- url: https://presidio-analyzer-prod.azurewebsites.net |
tags: |
- Analyzer |
summary: "Get Recognizers" |
description: "Get the available PII recognizers for a given language" |
parameters: |
- in: query |
name: language |
schema: |
type: string |
example: en |
description: "Two characters for the desired language in ISO_639-1 format" |
responses: |
200: |
description: OK |
content: |
application/json: |
schema: |
description: "A list of supported recognizers" |
type: array |
items: |
type: string |
description: "Recognizer name" |
example: |
[ |
"CryptoRecognizer", "CreditCardRecognizer", "IbanRecognizer", "UsPhoneRecognizer", |
"EmailRecognizer","UsPassportRecognizer", "NhsRecognizer", "IpRecognizer", |
"SpacyRecognizer","SgFinRecognizer", "UsSsnRecognizer","UsBankRecognizer", |
"DomainRecognizer", "UsLicenseRecognizer", "UsItinRecognizer" |
] |
/supportedentities: |
get: |
servers: |
- url: https://presidio-analyzer-prod.azurewebsites.net |
tags: |
- Analyzer |
summary: "Get supported entities" |
description: "Get the list of PII entities Presidio-Analyzer is capable of detecting" |
parameters: |
- in: query |
name: language |
schema: |
type: string |
example: en |
description: "Two characters for the desired language in ISO_639-1 format" |
responses: |
200: |
description: OK |
content: |
application/json: |
schema: |
description: "A list of supported entities" |
type: array |
items: |
$ref: "#/components/schemas/EntityTypes" |
example: |
/anonymize: |
post: |
servers: |
- url: https://presidio-anonymizer-prod.azurewebsites.net |
tags: |
- Anonymizer |
summary: "Anonymize Text" |
requestBody: |
$ref: "#/components/requestBodies/AnonymizeRequest" |
responses: |
200: |
description: OK |
content: |
application/json: |
schema: |
$ref: "#/components/schemas/AnonymizeResponse" |
examples: |
Replace and Redact Anonymizers: |
value: |
{ "text": "hello world, my name is ANONYMIZED. My number is: ", "items": [ { "operator": "redact", "entity_type": "PHONE_NUMBER", "start": 50, "end": 50, "text": "" }, { "operator": "replace", "entity_type": "NAME", "start": 24, "end": 34, "text": "ANONYMIZED" } ] } |
Replace as default Anonymizer: |
value: |
{ "text": "hello world, my name is ANONYMIZED. My number is: ANONYMIZED", "items": [ { "operator": "replace", "entity_type": "PHONE_NUMBER", "start": 50, "end": 60, "text": "ANONYMIZED" }, { "operator": "replace", "entity_type": "NAME", "start": 24, "end": 34, "text": "ANONYMIZED" } ] } |
400: |
$ref: "#/components/responses/400BadRequest" |
422: |
$ref: "#/components/responses/422UnprocessableEntity" |
/anonymizers: |
get: |
servers: |
- url: https://presidio-anonymizer-prod.azurewebsites.net |
tags: |
- Anonymizer |
summary: "Get supported anonymizers" |
responses: |
200: |
description: OK |
content: |
application/json: |
schema: |
description: "A list of all built-in supported anonymizers" |
type: array |
items: |
description: "The Anonymizer name" |
type: string |
example: "mask" |
example: |
[ "hash", "mask", "redact", "replace", "encrypt" ] |
/deanonymize: |
post: |
servers: |
- url: https://presidio-anonymizer-prod.azurewebsites.net |
tags: |
- Anonymizer |
summary: "Deanonymize Text" |
requestBody: |
$ref: "#/components/requestBodies/DeanonymizeRequest" |
responses: |
200: |
description: OK |
content: |
application/json: |
schema: |
$ref: "#/components/schemas/DeanonymizeResponse" |
examples: |
Decrypt Single PII: |
value: |
{ "text": "text_for_encryption", "items": [ { "start": 0, "end": 19, "operator":"decrypt", "text": "text_for_encryption","entity_type": "NUMBER" } ] } |
400: |
$ref: "#/components/responses/400BadRequest" |
422: |
$ref: "#/components/responses/422UnprocessableEntity" |
/deanonymizers: |
get: |
servers: |
- url: https://presidio-anonymizer-prod.azurewebsites.net |
tags: |
- Anonymizer |
summary: "Get supported deanonymizers" |
responses: |
200: |
description: OK |
content: |
application/json: |
schema: |
description: "A list of all built-in supported deanonymizers" |
type: array |
items: |
description: "The Deanonymizer name" |
type: string |
example: "decrypt" |
example: |
[ "decrypt" ] |
/health: |
get: |
servers: |
- url: https://presidio-anonymizer-prod.azurewebsites.net |
tags: |
- Anonymizer |
- Analyzer |
summary: "Healthcheck" |
responses: |
200: |
description: OK |
content: |
text/plain: |
schema: |
type: string |
example: Presidio Anonymizer service is up |
components: |
requestBodies: |
AnalyzeRequest: |
required: true |
content: |
application/json: |
schema: |
$ref: "#/components/schemas/AnalyzeRequest" |
examples: |
Minimal Request: |
value: |
{ |
"text": "John Smith drivers license is AC432223", |
"language": "en" |
} |
Enhanced Request : |
value: |
{ |
"text": "John Smith drivers license is AC432223 and the zip code is 12345", |
"language": "en", |
"return_decision_process": false, |
"correlation_id": "123e4567-e89b-12d3-a456-426614174000", |
"score_threshold": 0.6, |
"entities": ["US_DRIVER_LICENSE", "ZIP"], |
"trace": false, |
"ad_hoc_recognizers":[ |
{ |
"name": "Zip code Recognizer", |
"supported_language": "en", |
"patterns": [ |
{ |
"name": "zip code (weak)", |
"regex": "(\\b\\d{5}(?:\\-\\d{4})?\\b)", |
"score": 0.01 |
} |
], |
"context": ["zip", "code"], |
"supported_entity":"ZIP" |
} |
] |
} |
AnonymizeRequest: |
required: true |
content: |
application/json: |
schema: |
$ref: "#/components/schemas/AnonymizeRequest" |
examples: |
Replace and Redact Anonymizers: |
value: |
{ |
"text": "hello world, my name is Jane Doe. My number is: 034453334", |
"anonymizers": { |
"PERSON": { "type": "redact" }, |
"PHONE_NUMBER": { "type": "replace", "new_value": "ANONYMIZED" } |
}, |
"analyzer_results": [ |
{ "start": 24, "end": 32, "score": 0.8, "entity_type": "PERSON" }, |
{ "start": 48, "end": 57, "score": 0.95, "entity_type": "PHONE_NUMBER" } |
] |
} |
Replace as default Anonymizer: |
value: |
{ |
"text": "hello world, my name is Jane Doe.", |
"anonymizers": { |
"DEFAULT": { "type": "replace", "new_value": "ANONYMIZED" }, |
}, |
"analyzer_results": [ |
{ "start": 24, "end": 32, "score": 0.8, "entity_type": "PERSON" }, |
] |
} |
DeanonymizeRequest: |
required: true |
content: |
application/json: |
schema: |
$ref: "#/components/schemas/DeanonymizeRequest" |
examples: |
Deanonymize text: |
value: |
{ |
"text": "My name is S184CMt9Drj7QaKQ21JTrpYzghnboTF9pn/neN8JME0=", |
"deanonymizers": { |
"PERSON": { |
"type": "decrypt", |
"key": "WmZq4t7w!z%C&F)J" |
} |
}, |
"anonymizer_results": [ { |
"start": 11, |
"end": 55, |
"entity_type": "PERSON" |
} ] |
} |
schemas: |
AnalyzeRequest: |
type: object |
required: |
- text |
- language |
properties: |
text: |
type: string |
description: "The text to analyze" |
example: "hello world, my name is Jane Doe. My number is: 034453334" |
language: |
type: string |
description: "Two characters for the desired language in ISO_639-1 format" |
example: "en" |
correlation_id: |
type: string |
description: "A correlation id to append to headers and traces" |
score_threshold: |
type: number |
format: double |
description: "The minimal detection score threshold" |
entities: |
type: array |
items: |
$ref: "#/components/schemas/EntityTypes" |
description: "A list of entities to analyze" |
return_decision_process: |
type: boolean |
description: "Whether to include analysis explanation in the response" |
ad_hoc_recognizers: |
type: array |
description: "list of recognizers to be used in the context of this request only (ad-hoc)." |
items: |
$ref: "#/components/schemas/PatternRecognizer" |
context: |
type: array |
description: "list of context words which may help to raise recognized entities confidence" |
items: |
description: "The context word" |
type: string |
example: "address" |
AnonymizeRequest: |
type: object |
required: |
- text |
- analyzer_results |
properties: |
text: |
type: string |
description: "The text to anonymize" |
example: "hello world, my name is Jane Doe. My number is: 034453334" |
anonymizers: |
description: "Object where the key is DEFAULT or the ENTITY_TYPE and the value is the anonymizer definition" |
type: object |
additionalProperties: |
anyOf: |
- $ref: "#/components/schemas/Replace" |
- $ref: "#/components/schemas/Redact" |
- $ref: "#/components/schemas/Mask" |
- $ref: "#/components/schemas/Hash" |
- $ref: "#/components/schemas/Encrypt" |
default: |
{ "DEFAULT": { "type": "replace", "new_value": "<ENTITY_TYPE>" } } |
analyzer_results: |
type: array |
description: "Array of analyzer detections" |
items: |
$ref: "#/components/schemas/RecognizerResult" |
DeanonymizeRequest: |
type: object |
required: |
- text |
- anonymizer_results |
- deanonymizers |
properties: |
text: |
type: string |
description: "The anonymized text" |
example: "My name is S184CMt9Drj7QaKQ21JTrpYzghnboTF9pn/neN8JME0=" |
deanonymizers: |
description: "Object where the key is DEFAULT or the ENTITY_TYPE and the value is decrypt since it is the only one supported" |
type: object |
additionalProperties: |
anyOf: |
- $ref: "#/components/schemas/Decrypt" |
default: |
{ "DEFAULT": { "type": "decrypt", "key": "3t6w9z$C&F)J@NcR" } } |
anonymizer_results: |
type: array |
description: "Array of anonymized PIIs" |
items: |
$ref: "#/components/schemas/OperatorResult" |
RecognizerResult: |
type: object |
required: |
- start |
- end |
- score |
- entity_type |
properties: |
start: |
type: integer |
description: "Where the PII starts" |
example: 24 |
end: |
type: integer |
description: "Where the PII ends" |
example: 32 |
score: |
type: number |
format: double |
description: "The PII detection score" |
example: 0.8 |
entity_type: |
$ref: "#/components/schemas/EntityTypes" |
recognition_metadata: |
type: object |
$ref: "#/components/schemas/RecognizedMetadata" |
RecognizedMetadata: |
type: object |
properties: |
recognizer_name: |
type: string |
description: "Name of recognizer that made the decision" |
RecognizerResultWithAnaysisExplanation: |
allOf: |
- $ref: '#/components/schemas/RecognizerResult' |
- type: object |
properties: |
analysis_explanation: |
$ref: "#/components/schemas/AnalysisExplanation" |
AnalysisExplanation: |
type: object |
properties: |
recognizer: |
type: string |
description: "Name of recognizer that made the decision" |
pattern_name: |
type: string |
description: "name of pattern (if decision was made by a PatternRecognizer)" |
pattern: |
type: string |
description: "Regex pattern that was applied (if PatternRecognizer)" |
original_score: |
type: number |
format: double |
description: "Recognizer's confidence in result" |
score: |
type: number |
format: double |
description: "The PII detection score" |
textual_explanation: |
type: string |
description: "Free text for describing a decision of a logic or model" |
score_context_improvement: |
type: number |
format: double |
description: "Difference from the original score" |
supportive_context_word: |
type: string |
description: "The context word which helped increase the score" |
validation_result: |
type: number |
format: double |
description: "Result of a validation (e.g. checksum)" |
Pattern: |
type: object |
properties: |
name: |
type: string |
description: "Name of regular expression pattern" |
regex: |
type: string |
description: "Regex pattern string" |
score: |
type: number |
format: double |
description: "Detection confidence of this pattern (0.01 if very noisy, 0.6-1.0 if very specific)" |
PatternRecognizer: |
type: object |
description: "A regular expressions or deny-list based recognizer" |
properties: |
name: |
type: string |
description: "Name of recognizer" |
supported_language: |
type: string |
description: "Language code supported by this recognizer" |
patterns: |
description: "List of type Pattern containing regex expressions with additional metadata." |
type: array |
items: |
$ref: "#/components/schemas/Pattern" |
deny_list: |
type: array |
description: "List of words to be returned as PII if found." |
items: |
type: string |
context: |
description: "List of words to be used to increase confidence if found in the vicinity of detected entities." |
type: array |
items: |
type: string |
supported_entity: |
type: string |
description: "The name of entity this ad hoc recognizer detects" |
EntityTypes: |
description: "The supported PII entity types." |
type: string |
example: PERSON |
Replace: |
title: Replace |
description: "Replace with a given value" |
type: object |
required: |
- type |
- new_value |
properties: |
type: |
type: string |
description: "replace" |
example: replace |
new_value: |
type: string |
description: "The string to replace with" |
example: VALUE |
Redact: |
title: Redact |
description: "Replace with an empty string" |
type: object |
required: |
- type |
properties: |
type: |
type: string |
description: "redact" |
example: redact |
Mask: |
title: Mask |
description: "Replace with a given character" |
type: object |
required: |
- type |
- masking_char |
- chars_to_mask |
properties: |
type: |
type: string |
description: "mask" |
example: mask |
masking_char: |
type: string |
description: "The replacement character" |
example: "*" |
chars_to_mask: |
type: integer |
description: "The amount of characters that should be replaced" |
example: 4 |
from_end: |
type: boolean |
description: "Whether to mask the PII from it's end" |
example: true |
default: false |
Hash: |
title: Hash |
description: "Replace with hashed value" |
type: object |
required: |
- type |
properties: |
type: |
type: string |
description: "hash" |
example: hash |
hash_type: |
type: string |
description: "The hashing algorithm" |
enum: |
- md5 |
- sha256 |
- sha512 |
example: md5 |
default: md5 |
Encrypt: |
title: Encrypt |
description: "Replace with an encrypted value" |
type: object |
required: |
- type |
- key |
properties: |
type: |
type: string |
description: "encrypt" |
example: encrypt |
key: |
type: string |
description: "Cryptographic key of length 128, 192 or 256 bits, in a string format" |
example: "3t6w9z$C&F)J@NcR" |
Decrypt: |
title: Decrypt |
description: "Replace encrypted PII decrypted text" |
type: object |
required: |
- type |
- key |
properties: |
type: |
type: string |
description: "decrypt" |
example: decrypt |
key: |
type: string |
description: "Cryptographic key of length 128, 192 or 256 bits, in a string format" |
example: "3t6w9z$C&F)J@NcR" |
AnonymizeResponse: |
type: object |
properties: |
text: |
type: string |
items: |
type: array |
description: "Array of anonymized entities" |
items: |
$ref: "#/components/schemas/OperatorResult" |
OperatorResult: |
required: |
- start |
- end |
- entity_type |
type: object |
properties: |
operator: |
type: string |
description: "Name of the used operator" |
entity_type: |
type: string |
description: "Type of the PII entity" |
start: |
type: integer |
description: "Start index of the changed text" |
end: |
type: integer |
description: "End index in the changed text" |
text: |
type: string |
description: "The new text returned" |
DeanonymizeResponse: |
type: object |
properties: |
text: |
type: string |
items: |
type: array |
description: "Array of deanonymized entities" |
items: |
$ref: "#/components/schemas/OperatorResult" |
responses: |
400BadRequest: |
description: Bad request |
content: |
application/json: |
schema: |
type: object |
properties: |
error: |
type: string |
example: "Invalid request json" |
422UnprocessableEntity: |
description: Unprocessable Entity |
content: |
application/json: |
schema: |
type: object |
properties: |
error: |
type: string |
example: "Invalid input, text can not be empty" |