|
""" |
|
Pruthi2019: Combating with Robust Word Recognition |
|
================================================================= |
|
|
|
""" |
|
from textattack import Attack |
|
from textattack.constraints.overlap import MaxWordsPerturbed |
|
from textattack.constraints.pre_transformation import ( |
|
MinWordLength, |
|
RepeatModification, |
|
StopwordModification, |
|
) |
|
from textattack.goal_functions import UntargetedClassification |
|
from textattack.search_methods import GreedySearch |
|
from textattack.transformations import ( |
|
CompositeTransformation, |
|
WordSwapNeighboringCharacterSwap, |
|
WordSwapQWERTY, |
|
WordSwapRandomCharacterDeletion, |
|
WordSwapRandomCharacterInsertion, |
|
) |
|
|
|
from .attack_recipe import AttackRecipe |
|
|
|
|
|
class Pruthi2019(AttackRecipe): |
|
"""An implementation of the attack used in "Combating Adversarial |
|
Misspellings with Robust Word Recognition", Pruthi et al., 2019. |
|
|
|
This attack focuses on a small number of character-level changes that simulate common typos. It combines: |
|
- Swapping neighboring characters |
|
- Deleting characters |
|
- Inserting characters |
|
- Swapping characters for adjacent keys on a QWERTY keyboard. |
|
|
|
https://arxiv.org/abs/1905.11268 |
|
|
|
:param model: Model to attack. |
|
:param max_num_word_swaps: Maximum number of modifications to allow. |
|
""" |
|
|
|
@staticmethod |
|
def build(model_wrapper, max_num_word_swaps=1): |
|
|
|
|
|
transformation = CompositeTransformation( |
|
[ |
|
WordSwapNeighboringCharacterSwap( |
|
random_one=False, skip_first_char=True, skip_last_char=True |
|
), |
|
WordSwapRandomCharacterDeletion( |
|
random_one=False, skip_first_char=True, skip_last_char=True |
|
), |
|
WordSwapRandomCharacterInsertion( |
|
random_one=False, skip_first_char=True, skip_last_char=True |
|
), |
|
WordSwapQWERTY( |
|
random_one=False, skip_first_char=True, skip_last_char=True |
|
), |
|
] |
|
) |
|
|
|
|
|
|
|
|
|
constraints = [ |
|
MinWordLength(min_length=4), |
|
StopwordModification(), |
|
MaxWordsPerturbed(max_num_words=max_num_word_swaps), |
|
RepeatModification(), |
|
] |
|
|
|
goal_function = UntargetedClassification(model_wrapper) |
|
search_method = GreedySearch() |
|
return Attack(goal_function, constraints, transformation, search_method) |
|
|