DawnC commited on
Commit
807f22a
·
verified ·
1 Parent(s): 79d4f18

Update smart_breed_matcher.py

Browse files
Files changed (1) hide show
  1. smart_breed_matcher.py +76 -10
smart_breed_matcher.py CHANGED
@@ -81,33 +81,81 @@ class SmartBreedMatcher:
81
  return sorted(similarities, key=lambda x: x[1], reverse=True)[:top_n]
82
 
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict) -> float:
85
  """計算兩個品種之間的相似度,包含健康和噪音因素"""
86
  # 計算描述文本的相似度
87
  desc1_embedding = self._get_cached_embedding(breed1_features['description'])
88
  desc2_embedding = self._get_cached_embedding(breed2_features['description'])
89
  description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding))
90
-
91
- # 基本特徵相似度
92
- size_similarity = 1.0 if breed1_features['size'] == breed2_features['size'] else 0.5
93
- exercise_similarity = 1.0 if breed1_features['exercise'] == breed2_features['exercise'] else 0.5
94
-
95
  # 性格相似度
96
  temp1_embedding = self._get_cached_embedding(breed1_features['temperament'])
97
  temp2_embedding = self._get_cached_embedding(breed2_features['temperament'])
98
  temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
99
-
100
  # 健康分數相似度
101
  health_score1 = self._calculate_health_score(breed1_features['breed_name'])
102
  health_score2 = self._calculate_health_score(breed2_features['breed_name'])
103
  health_similarity = 1.0 - abs(health_score1 - health_score2)
104
-
105
  # 噪音水平相似度
106
  noise_similarity = self._calculate_noise_similarity(
107
  breed1_features['breed_name'],
108
  breed2_features['breed_name']
109
  )
110
-
111
  # 加權計算
112
  weights = {
113
  'description': 0.25,
@@ -117,7 +165,7 @@ class SmartBreedMatcher:
117
  'health': 0.15,
118
  'noise': 0.15
119
  }
120
-
121
  final_similarity = (
122
  description_similarity * weights['description'] +
123
  temperament_similarity * weights['temperament'] +
@@ -126,7 +174,7 @@ class SmartBreedMatcher:
126
  health_similarity * weights['health'] +
127
  noise_similarity * weights['noise']
128
  )
129
-
130
  return final_similarity
131
 
132
 
@@ -192,6 +240,24 @@ class SmartBreedMatcher:
192
  'scores': {k: round(v, 4) for k, v in scores.items()}
193
  }
194
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  def _calculate_health_score(self, breed_name: str) -> float:
196
  """計算品種的健康分數"""
197
  if breed_name not in breed_health_info:
 
81
  return sorted(similarities, key=lambda x: x[1], reverse=True)[:top_n]
82
 
83
 
84
+ # def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict) -> float:
85
+ # """計算兩個品種之間的相似度,包含健康和噪音因素"""
86
+ # # 計算描述文本的相似度
87
+ # desc1_embedding = self._get_cached_embedding(breed1_features['description'])
88
+ # desc2_embedding = self._get_cached_embedding(breed2_features['description'])
89
+ # description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding))
90
+
91
+ # # 基本特徵相似度
92
+ # size_similarity = 1.0 if breed1_features['size'] == breed2_features['size'] else 0.5
93
+ # exercise_similarity = 1.0 if breed1_features['exercise'] == breed2_features['exercise'] else 0.5
94
+
95
+ # # 性格相似度
96
+ # temp1_embedding = self._get_cached_embedding(breed1_features['temperament'])
97
+ # temp2_embedding = self._get_cached_embedding(breed2_features['temperament'])
98
+ # temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
99
+
100
+ # # 健康分數相似度
101
+ # health_score1 = self._calculate_health_score(breed1_features['breed_name'])
102
+ # health_score2 = self._calculate_health_score(breed2_features['breed_name'])
103
+ # health_similarity = 1.0 - abs(health_score1 - health_score2)
104
+
105
+ # # 噪音水平相似度
106
+ # noise_similarity = self._calculate_noise_similarity(
107
+ # breed1_features['breed_name'],
108
+ # breed2_features['breed_name']
109
+ # )
110
+
111
+ # # 加權計算
112
+ # weights = {
113
+ # 'description': 0.25,
114
+ # 'temperament': 0.20,
115
+ # 'exercise': 0.2,
116
+ # 'size': 0.05,
117
+ # 'health': 0.15,
118
+ # 'noise': 0.15
119
+ # }
120
+
121
+ # final_similarity = (
122
+ # description_similarity * weights['description'] +
123
+ # temperament_similarity * weights['temperament'] +
124
+ # exercise_similarity * weights['exercise'] +
125
+ # size_similarity * weights['size'] +
126
+ # health_similarity * weights['health'] +
127
+ # noise_similarity * weights['noise']
128
+ # )
129
+
130
+ # return final_similarity
131
+
132
  def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict) -> float:
133
  """計算兩個品種之間的相似度,包含健康和噪音因素"""
134
  # 計算描述文本的相似度
135
  desc1_embedding = self._get_cached_embedding(breed1_features['description'])
136
  desc2_embedding = self._get_cached_embedding(breed2_features['description'])
137
  description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding))
138
+
139
+ # 使用新的精細計算方法
140
+ size_similarity = self._calculate_size_similarity(breed1_features['size'], breed2_features['size'])
141
+ exercise_similarity = self._calculate_exercise_similarity(breed1_features['exercise'], breed2_features['exercise'])
142
+
143
  # 性格相似度
144
  temp1_embedding = self._get_cached_embedding(breed1_features['temperament'])
145
  temp2_embedding = self._get_cached_embedding(breed2_features['temperament'])
146
  temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
147
+
148
  # 健康分數相似度
149
  health_score1 = self._calculate_health_score(breed1_features['breed_name'])
150
  health_score2 = self._calculate_health_score(breed2_features['breed_name'])
151
  health_similarity = 1.0 - abs(health_score1 - health_score2)
152
+
153
  # 噪音水平相似度
154
  noise_similarity = self._calculate_noise_similarity(
155
  breed1_features['breed_name'],
156
  breed2_features['breed_name']
157
  )
158
+
159
  # 加權計算
160
  weights = {
161
  'description': 0.25,
 
165
  'health': 0.15,
166
  'noise': 0.15
167
  }
168
+
169
  final_similarity = (
170
  description_similarity * weights['description'] +
171
  temperament_similarity * weights['temperament'] +
 
174
  health_similarity * weights['health'] +
175
  noise_similarity * weights['noise']
176
  )
177
+
178
  return final_similarity
179
 
180
 
 
240
  'scores': {k: round(v, 4) for k, v in scores.items()}
241
  }
242
 
243
+ def _calculate_size_similarity(self, size1: str, size2: str) -> float:
244
+ size_map = {'Small': 1, 'Medium': 2, 'Large': 3, 'Giant': 4}
245
+ value1 = size_map.get(size1, 2) # 預設為 'Medium'
246
+ value2 = size_map.get(size2, 2) # 預設為 'Medium'
247
+
248
+ # 計算相似度
249
+ size_similarity = 1.0 - abs(value1 - value2) / 3
250
+ return max(0.0, size_similarity) # 確保相似度在 [0, 1] 範圍內
251
+
252
+ def _calculate_exercise_similarity(self, exercise1: str, exercise2: str) -> float:
253
+ exercise_map = {'Low': 1, 'Moderate': 2, 'High': 3, 'Very High': 4}
254
+ value1 = exercise_map.get(exercise1, 2) # 預設為 'Moderate'
255
+ value2 = exercise_map.get(exercise2, 2) # 預設為 'Moderate'
256
+
257
+ # 計算相似度
258
+ exercise_similarity = 1.0 - abs(value1 - value2) / 3
259
+ return max(0.0, exercise_similarity) # 確保相似度在 [0, 1] 範圍內
260
+
261
  def _calculate_health_score(self, breed_name: str) -> float:
262
  """計算品種的健康分數"""
263
  if breed_name not in breed_health_info: