import torch import torchvision.transforms as transforms from PIL import Image from transformers import AutoModel, AutoTokenizer # Load the pre-trained ResNet50 model from Hugging Face model_name = 'pytorch/vision:v0.9.0' model = AutoModel.from_pretrained(model_name) # Set the model to evaluation mode model.eval() # Load the tokenizer tokenizer = AutoTokenizer.from_pretrained(model_name) # Define the preprocessing pipeline preprocess = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # Define a function to extract features from an image def extract_features(image_path, model, tokenizer, preprocess): # Load the image image = Image.open(image_path).convert('RGB') # Apply the preprocessing pipeline image = preprocess(image) # Add a batch dimension to the image tensor image = image.unsqueeze(0) # Encode the image using the tokenizer inputs = tokenizer(image, padding=True, truncation=True, return_tensors='pt') # Pass the inputs through the model to get the features outputs = model(**inputs) # Return the features return outputs.last_hidden_state.squeeze().detach().numpy() # Define a dictionary to store the features features_dict = {} # Loop over the images and extract the features for image_name in image_names: # Extract the features for this image image_path = os.path.join(images_folder, image_name) features = extract_features(image_path, model, tokenizer, preprocess) # Add the features to the dictionary features_dict[image_name] = features