import json # Path to your vocab.json vocab_path = "/Users/apple/Desktop/indictrans2/IndicTrans2/huggingface_interface/IndicTransToolkit/tokenizer_training/my_tokenizer/vocab.json" # Load vocab.json with open(vocab_path, "r", encoding="utf-8") as f: vocab = json.load(f) # Print vocab size & first few entries print(f"Vocab size: {len(vocab)}") print("Sample tokens:") for i, (token, idx) in enumerate(vocab.items()): print(f"{token}: {idx}") if i >= 10: # Limit output break