w32zhong commited on
Commit
03f842c
·
1 Parent(s): 10f7cbb
Files changed (3) hide show
  1. .gitignore +1 -0
  2. README.md +5 -2
  3. test.py +1 -0
.gitignore CHANGED
@@ -1,3 +1,4 @@
1
  ckpt/
2
  *.tar.gz
3
  *.swp
 
 
1
  ckpt/
2
  *.tar.gz
3
  *.swp
4
+ pya0
README.md CHANGED
@@ -26,8 +26,11 @@ Download your tokenizer, model checkpoints, and optionally the training logs (`e
26
 
27
  Optionally, test model using the MLM task:
28
  ```sh
29
- pip install pya0
30
- python test.py ./ckpt/to/tokenizer ./ckpt/to/model
 
 
 
31
  ```
32
  > **Note**
33
  > Modify the test examples in `test.txt` to play with it.
 
26
 
27
  Optionally, test model using the MLM task:
28
  ```sh
29
+ pip install pya0 # for math token preprocessing
30
+ # testing local checkpoints:
31
+ python test.py ./ckpt/math-tokenizer ./ckpt/2-2-0/encoder.ckpt
32
+ # testing Model Hub checkpoints:
33
+ python test.py approach0/coco-mae-220 approach0/coco-mae-220
34
  ```
35
  > **Note**
36
  > Modify the test examples in `test.txt` to play with it.
test.py CHANGED
@@ -43,6 +43,7 @@ def test(tokenizer_name_or_path, model_name_or_path, test_file='test.txt'):
43
  for pos in filter(lambda x: x!=0, maskpos):
44
  tokens[pos-1] = '[MASK]'
45
  sentence = ' '.join(tokens)
 
46
  tokens = tokenizer(sentence,
47
  padding=True, truncation=True, return_tensors="pt")
48
  #print(tokenizer.decode(tokens['input_ids'][0]))
 
43
  for pos in filter(lambda x: x!=0, maskpos):
44
  tokens[pos-1] = '[MASK]'
45
  sentence = ' '.join(tokens)
46
+ sentence = sentence.replace('[mask]', '[MASK]')
47
  tokens = tokenizer(sentence,
48
  padding=True, truncation=True, return_tensors="pt")
49
  #print(tokenizer.decode(tokens['input_ids'][0]))