anilbhatt1 commited on
Commit
3dc21f1
·
1 Parent(s): b671fba

Update tokenizer.py

Browse files
Files changed (1) hide show
  1. tokenizer.py +5 -1
tokenizer.py CHANGED
@@ -1,10 +1,10 @@
1
  import json
2
  from pathlib import Path
3
  from typing import Optional, Union
 
4
 
5
  import torch
6
 
7
-
8
  class Tokenizer:
9
  def __init__(self, checkpoint_dir: Union[Path, str]) -> None:
10
  checkpoint_dir = Path(checkpoint_dir)
@@ -17,6 +17,10 @@ class Tokenizer:
17
 
18
  # some checkpoints have both files, `.model` takes precedence
19
  print(f'tokenizer.py checkpoint_dir is : {checkpoint_dir}')
 
 
 
 
20
  if (vocabulary_path := checkpoint_dir / "tokenizer.model").is_file():
21
  from sentencepiece import SentencePieceProcessor
22
 
 
1
  import json
2
  from pathlib import Path
3
  from typing import Optional, Union
4
+ import os
5
 
6
  import torch
7
 
 
8
  class Tokenizer:
9
  def __init__(self, checkpoint_dir: Union[Path, str]) -> None:
10
  checkpoint_dir = Path(checkpoint_dir)
 
17
 
18
  # some checkpoints have both files, `.model` takes precedence
19
  print(f'tokenizer.py checkpoint_dir is : {checkpoint_dir}')
20
+ print(f'checking the file : {(checkpoint_dir / "tokenizer.json").is_file()}')
21
+ print(f'Current working directory is : {os.getcwd()}')
22
+ curr_dir = os.getcwd()
23
+ print(f'contents in pwd are : {os.listdir(curr_dir)}')
24
  if (vocabulary_path := checkpoint_dir / "tokenizer.model").is_file():
25
  from sentencepiece import SentencePieceProcessor
26