Spaces:
Sleeping
Sleeping
File size: 805 Bytes
d44849f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# Load your custom tokenizer (adjust paths)
tokenizer = AutoTokenizer.from_pretrained("IndicTrans2/huggingface_interface/IndicTransToolkit/tokenizer_training/my_tokenizer/tokenizer.json")
# Load IndicTrans2 model properly
model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-indic-en-1B", ignore_mismatched_sizes=True)
# Resize embeddings to match new tokenizer
model.resize_token_embeddings(len(tokenizer))
# Save updated model & tokenizer
model.save_pretrained("IndicTrans2/huggingface_interface/IndicTransToolkit/tokenizer_training/my_tokenizer/converted_tokenizer/new_model")
tokenizer.save_pretrained("IndicTrans2/huggingface_interface/IndicTransToolkit/tokenizer_training/my_tokenizer/converted_tokenizer/new_model")
|