Spaces:
Runtime error
Runtime error
File size: 1,963 Bytes
c57ad5c 070defa c57ad5c f678cb2 c57ad5c 070defa a39c5c8 070defa 20ab731 c57ad5c c4b80ab f05764b c57ad5c b03a557 a39c5c8 c57ad5c 20ab731 c57ad5c 070defa c57ad5c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
from datasets import load_dataset, concatenate_datasets
from sentence_transformers import SentenceTransformer
from torchvision import transforms
from models.encoder import Encoder
from indexer import Indexer
import numpy as np
import torch
import os
model = SentenceTransformer('intfloat/multilingual-e5-base')
encoder = Encoder()
encoder.load_state_dict(torch.load('./models/encoder.bin', map_location=torch.device('cpu')))
dataset = load_dataset("Ransaka/youtube_recommendation_data", token=os.environ.get('HF'))
dataset = concatenate_datasets([dataset['train'], dataset['test']])
latent_data = torch.load("data/latent_data_final.bin")
embeddings = torch.load("data/embeddings.bin")
def row_wise_normalize_and_concatenate(array1, array2):
normalized_array1 = array1 / np.linalg.norm(array1, axis=1, keepdims=True)
normalized_array2 = array2 / np.linalg.norm(array2, axis=1, keepdims=True)
concatenated_array = np.concatenate((normalized_array1, normalized_array2), axis=1)
return concatenated_array
# result_array = row_wise_normalize_and_concatenate(latent_data, embeddings)
# index = Indexer(result_array)
index = Indexer(latent_data)
def get_recommendations(image, title, k):
title_embeds = model.encode([title], normalize_embeddings=True)
image = transforms.ToTensor()(image.convert("L"))
image_embeds = encoder(image).detach().numpy()
# image_embeds = image_embeds / np.linalg.norm(image_embeds, axis=1, keepdims=True)
final_embeds = np.concatenate((image_embeds,title_embeds), axis=1)
# candidates = index.topk(final_embeds,k=k)
candidates = index.topk(image_embeds,k=k)
final_candidates = []
final_candidates.append(list(candidates[0]))
final_candidates = sum(final_candidates,[])
results_dict = {"image":[], "title":[]}
for candidate in final_candidates:
results_dict['image'].append(dataset['image'][candidate])
results_dict['title'].append(dataset['title'][candidate])
return results_dict |