from transformers import Wav2Vec2ProcessorWithLM | |
import torchaudio | |
import torch | |
from datasets import load_dataset | |
from transformers import AutoModelForCTC, AutoProcessor | |
import torchaudio.functional as F | |
model_id = "." | |
model = AutoModelForCTC.from_pretrained(model_id) | |
processor = AutoProcessor.from_pretrained(model_id) | |
vocab_dict = processor.tokenizer.get_vocab() | |
print(vocab_dict) | |
sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])} |