NamProlah
/

upload-testing

Model card Files Files and versions Community

upload-testing / recommend.py

NamProlah

Upload 9 files

af1bda1 over 1 year ago

raw

history blame contribute delete

4.74 kB

	import argparse
	import os.path
	import joblib
	import numpy as np
	import pandas as pd
	import tensorflow as tf
	from PIL import Image
	from matplotlib import pyplot as plt
	from sklearn.metrics.pairwise import cosine_similarity

	from classify import classify


	def recommend(
	ref_path: str, num_recommendations: int,
	data_path: str, clf_path: str, fe_path: str, clu_path: str,
	) -> list:
	"""
	Recommends similar images based on a reference image.

	:param ref_path: Path to the reference image.
	:param num_recommendations: Number of recommended images to return.
	:param data_path: Path to the .csv data file containing recommender database image feature vectors. This file must be generated using the same feature extractor specified in fe_path.
	:param clf_path: Path to the classifier model file.
	:param fe_path: Path to the feature extraction model file.
	:param clu_path: Path to the clustering model file.
	:return: List of paths to the recommended images.
	"""
	if num_recommendations < 1:
	raise ValueError('Number of recommendations cannot be smaller than 1.')

	df_rec = pd.read_csv(data_path)
	fe = tf.keras.models.load_model(fe_path)
	clu = joblib.load(clu_path)
	clu.set_params(n_clusters=int(np.sqrt(len(df_rec) / num_recommendations)))

	ref_processed, ref_class = classify(ref_path, classifier_path=clf_path, return_original=False, verbose=False)
	recommendations = df_rec[df_rec['Class'] == ref_class]

	# Extract reference image feature vector
	ref_processed = np.squeeze(ref_processed)
	ref_feature_vector = fe.predict(
	tf.expand_dims(ref_processed, axis=0),
	verbose=0
	)
	ref_feature_vector = ref_feature_vector.astype(float)
	ref_feature_vector = ref_feature_vector.reshape(1, -1)

	# Cluster reference image
	clu.fit(recommendations.drop(['ImgPath', 'Class'], axis='columns').values)
	ref_cluster = clu.predict(ref_feature_vector)
	ref_cluster_indices = np.where(clu.labels_ == ref_cluster)[0]
	recommendations = recommendations.iloc[ref_cluster_indices]

	# Rank cluster and produce top cosine similarity recommendations
	cosine_similarities = cosine_similarity(
	ref_feature_vector,
	recommendations.drop(['ImgPath', 'Class'], axis='columns')
	)
	sorted_ref_cluster_indices = np.argsort(-cosine_similarities.flatten())
	if num_recommendations > len(sorted_ref_cluster_indices):
	raise ValueError('Number of recommendations too large. Insufficient database size.')
	top_ref_cluster_indices = sorted_ref_cluster_indices[:num_recommendations]
	recommendations = recommendations.iloc[top_ref_cluster_indices]

	return list(recommendations['ImgPath'].values)


	if __name__ == '__main__':
	ap = argparse.ArgumentParser()
	ap.add_argument('-f', '--file', required=True, help='reference image')
	ap.add_argument('-d', '--database', default='data/recommender-database', help='the database containing the images to be recommended, default: data/recommender-database')
	ap.add_argument('-c', '--classifier', default='models/clf-cnn', help='the machine learning model used for image classification, default: models/clf-cnn')
	ap.add_argument('-e', '--feature-extractor', default='models/fe-cnn', help='the machine learning model used for image feature extraction, default: models/fe-cnn')
	ap.add_argument('-k', '--clustering-model', default='models/clu-kmeans.model', help='the machine learning model used for image clustering, default: models/clu-kmeans.model')
	ap.add_argument('-n', '--num', required=False, default='10', help="number of recommendations, default: 10")
	args = vars(ap.parse_args())
	num = int(args['num'])

	fig, axes = plt.subplots(max([1, num // 5]) + 1, 5, figsize=(16, 16), num='Flower Image Recommender')
	axes = axes.ravel()

	ref = Image.open(args['file'])
	_, ref_class = classify(args['file'], classifier_path=args['classifier'], return_original=False, verbose=False)
	axes[2].imshow(ref)
	axes[2].set_title(
	f'Reference Image - "{ref_class}"',
	fontsize=10,
	weight='bold'
	)
	axes[2].text(
	0.5, -0.08, f'{os.path.relpath(args["file"])}',
	horizontalalignment='center',
	verticalalignment='center_baseline',
	transform=axes[2].transAxes,
	fontsize=8,
	)
	for i, rec_path in enumerate(recommend(
	args['file'], int(args['num']),
	args['database'] + '.csv', args['classifier'], args['feature_extractor'], args['clustering_model']
	), start=5):
	with Image.open(f'{args["database"]}/{rec_path}') as rec:
	axes[i].imshow(rec)

	for ax in axes:
	ax.axis('off')

	plt.show()