Spaces:
Sleeping
Sleeping
File size: 1,410 Bytes
fd7be9e 0182b00 fd7be9e 0182b00 fd7be9e 0182b00 fd7be9e 0182b00 fd7be9e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
from pymongo import MongoClient
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
client = MongoClient("mongodb+srv://waseoke:[email protected]/test?retryWrites=true&w=majority")
db = client["two_tower_model"]
user_embedding_collection = db["user_embeddings"]
train_dataset = db["train_dataset"]
def calculate_similarity(user_id):
# 사용자 임베딩 가져오기
user_data = user_embedding_collection.find_one({"user_id": user_id})
if not user_data:
raise ValueError(f"No embedding found for user_id: {user_id}")
user_embedding = np.array(user_data["embedding"]).reshape(1, -1)
# Anchor 데이터 가져오기
anchor_embeddings = []
train_data = list(train_dataset.find())
for entry in train_data:
anchor_embeddings.append(entry["anchor_embedding"])
anchor_embeddings = np.array(anchor_embeddings)
# Cosine Similarity 계산
similarities = cosine_similarity(user_embedding, anchor_embeddings).flatten()
# 가장 유사한 anchor 선택
most_similar_index = np.argmax(similarities)
most_similar_entry = train_data[most_similar_index]
most_similar_positive = most_similar_entry["positive"]
print(f"Most similar anchor for user {user_id}: {most_similar_entry['anchor']}")
print(f"Recommended positive product: {most_similar_positive}")
return most_similar_positive |