waseoke commited on
Commit
a65b2d6
·
verified ·
1 Parent(s): 2bffc21

Create embed_data.py

Browse files
Files changed (1) hide show
  1. embed_data.py +45 -0
embed_data.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import BertTokenizer, BertModel
3
+ from torch.nn import Embedding
4
+ import numpy as np
5
+
6
+ # BERT 모델 및 토크나이저 로드
7
+ tokenizer = BertTokenizer.from_pretrained("klue/bert-base")
8
+ bert_model = BertModel.from_pretrained("klue/bert-base")
9
+
10
+ # 상품 데이터 임베딩
11
+ def embed_product_data(product_data):
12
+ text = product_data.get("title", "") + " " + product_data.get("description", "")
13
+ inputs = tokenizer(
14
+ text, return_tensors="pt", truncation=True, padding=True, max_length=128
15
+ )
16
+ outputs = bert_model(**inputs)
17
+ text_embedding = outputs.last_hidden_state.mean(dim=1)
18
+
19
+ category_embedding_layer = Embedding(num_embeddings=50, embedding_dim=16)
20
+ color_embedding_layer = Embedding(num_embeddings=20, embedding_dim=8)
21
+
22
+ category_id = product_data.get("category_id", 0)
23
+ color_id = product_data.get("color_id", 0)
24
+
25
+ category_embedding = category_embedding_layer(torch.tensor([category_id]))
26
+ color_embedding = color_embedding_layer(torch.tensor([color_id]))
27
+
28
+ combined_embedding = torch.cat((text_embedding, category_embedding, color_embedding), dim=1)
29
+ return combined_embedding.detach().numpy()
30
+
31
+ # 사용자 데이터 임베딩
32
+ def embed_user_data(user_data):
33
+ embedding_layer = Embedding(num_embeddings=100, embedding_dim=128)
34
+
35
+ gender_id = 0 if user_data['gender'] == 'M' else 1
36
+ scaled_height = int((user_data['height'] - 50) * 99 // 200)
37
+ scaled_weight = int((user_data['weight'] - 30) * 99 // 170)
38
+
39
+ age_embedding = embedding_layer(torch.tensor([user_data['age']])).view(1, -1)
40
+ gender_embedding = embedding_layer(torch.tensor([gender_id])).view(1, -1)
41
+ height_embedding = embedding_layer(torch.tensor([scaled_height])).view(1, -1)
42
+ weight_embedding = embedding_layer(torch.tensor([scaled_weight])).view(1, -1)
43
+
44
+ combined_embedding = torch.cat((age_embedding, gender_embedding, height_embedding, weight_embedding), dim=1)
45
+ return combined_embedding.detach().numpy()