File size: 3,239 Bytes
fd7be9e
0182b00
fd7be9e
 
dab0bda
fd7be9e
 
 
dab0bda
fd7be9e
 
dab0bda
 
 
 
 
 
 
 
 
 
 
 
fd7be9e
 
 
 
 
 
 
 
dab0bda
fd7be9e
 
dab0bda
fd7be9e
dab0bda
fd7be9e
0182b00
fd7be9e
dab0bda
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0182b00
dab0bda
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd7be9e
0182b00
dab0bda
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from pymongo import MongoClient
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# MongoDB Atlas 연결
client = MongoClient("mongodb+srv://waseoke:[email protected]/test?retryWrites=true&w=majority")
db = client["two_tower_model"]
user_embedding_collection = db["user_embeddings"]
product_embedding_collection = db["product_embeddings"]
train_dataset = db["train_dataset"]

# 유사도 계산 함수
def calculate_similarity(input_embedding, target_embeddings):
    """
    입력 임베딩과 대상 임베딩들 간의 cosine similarity를 계산.
    """
    similarities = cosine_similarity(input_embedding, target_embeddings).flatten()
    return similarities

def find_most_similar_anchor(user_id):
    """
    사용자 임베딩을 기준으로 가장 유사한 anchor 상품을 반환.
    """
    # 사용자 임베딩 가져오기
    user_data = user_embedding_collection.find_one({"user_id": user_id})
    if not user_data:
        raise ValueError(f"No embedding found for user_id: {user_id}")
    
    user_embedding = np.array(user_data["embedding"]).reshape(1, -1)

    # Anchor 데이터 가져오기
    anchors, anchor_embeddings = [], []
    train_data = list(train_dataset.find())
    for entry in train_data:
        anchors.append(entry["anchor"])
        anchor_embeddings.append(entry["anchor_embedding"])

    anchor_embeddings = np.array(anchor_embeddings)

    # Cosine Similarity 계산
    similarities = calculate_similarity(user_embedding, anchor_embeddings)
    most_similar_index = np.argmax(similarities)

    return anchors[most_similar_index], anchor_embeddings[most_similar_index]

def find_most_similar_product(anchor_embedding):
    """
    Anchor 임베딩과 학습된 임베딩 중 가장 유사한 상품을 반환.
    """
    # Train 데이터의 positive/negative 임베딩과 비교
    train_data = list(train_dataset.find())
    train_embeddings, products = [], []
    for entry in train_data:
        products.extend([entry["positive"], entry["negative"]])
        train_embeddings.extend([entry["positive_embedding"], entry["negative_embedding"]])

    train_embeddings = np.array(train_embeddings)

    # Cosine Similarity 계산
    similarities = calculate_similarity(anchor_embedding.reshape(1, -1), train_embeddings)
    most_similar_index = np.argmax(similarities)

    return products[most_similar_index], train_embeddings[most_similar_index]

def recommend_shop_product(similar_product_embedding):
    """
    유사한 학습된 상품 임베딩과 쇼핑몰 상품 임베딩을 비교하여 추천.
    """
    # 쇼핑몰 상품 임베딩 데이터 가져오기
    all_products = list(product_embedding_collection.find())
    shop_product_embeddings, shop_product_ids = [], []
    for product in all_products:
        shop_product_ids.append(product["product_id"])
        shop_product_embeddings.append(product["embedding"])

    shop_product_embeddings = np.array(shop_product_embeddings)

    # Cosine Similarity 계산
    similarities = calculate_similarity(similar_product_embedding.reshape(1, -1), shop_product_embeddings)
    most_similar_index = np.argmax(similarities)

    return shop_product_ids[most_similar_index]