# coding=utf-8 # Copyright 2022 HuggingFace Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import unittest from transformers import DonutProcessor DONUT_PRETRAINED_MODEL_NAME = "naver-clova-ix/donut-base" class DonutProcessorTest(unittest.TestCase): def setUp(self): self.processor = DonutProcessor.from_pretrained(DONUT_PRETRAINED_MODEL_NAME) def test_token2json(self): expected_json = { "name": "John Doe", "age": "99", "city": "Atlanta", "state": "GA", "zip": "30301", "phone": "123-4567", "nicknames": [{"nickname": "Johnny"}, {"nickname": "JD"}], } sequence = ( "John Doe99Atlanta" "GA30301123-4567" "Johnny" "JD" ) actual_json = self.processor.token2json(sequence) self.assertDictEqual(actual_json, expected_json)