from transformers import WhisperFeatureExtractor
from transformers.models.whisper.modeling_whisper import WhisperEncoder
feature_extractor = WhisperFeatureExtractor.from_pretrained("techintermezzo/whisper-encoder-medium")
model = WhisperEncoder.from_pretrained("techintermezzo/whisper-encoder-medium").half()
model.eval()
with torch.inference_mode():
input_features = feature_extractor(inputs, sampling_rate=16000, return_tensors="pt").input_features
last_hidden_state = model(input_features).last_hidden_state