Spaces:
Sleeping
Sleeping
stt-model-ml-en
/
IndicTrans2
/huggingface_interface
/IndicTransToolkit
/tokenizer_training
/testing_json.py
import json | |
# Path to your vocab.json | |
vocab_path = "/Users/apple/Desktop/indictrans2/IndicTrans2/huggingface_interface/IndicTransToolkit/tokenizer_training/my_tokenizer/vocab.json" | |
# Load vocab.json | |
with open(vocab_path, "r", encoding="utf-8") as f: | |
vocab = json.load(f) | |
# Print vocab size & first few entries | |
print(f"Vocab size: {len(vocab)}") | |
print("Sample tokens:") | |
for i, (token, idx) in enumerate(vocab.items()): | |
print(f"{token}: {idx}") | |
if i >= 10: # Limit output | |
break | |