Spaces:
Sleeping
Sleeping
Update smart_breed_matcher.py
Browse files- smart_breed_matcher.py +76 -10
smart_breed_matcher.py
CHANGED
@@ -81,33 +81,81 @@ class SmartBreedMatcher:
|
|
81 |
return sorted(similarities, key=lambda x: x[1], reverse=True)[:top_n]
|
82 |
|
83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict) -> float:
|
85 |
"""計算兩個品種之間的相似度,包含健康和噪音因素"""
|
86 |
# 計算描述文本的相似度
|
87 |
desc1_embedding = self._get_cached_embedding(breed1_features['description'])
|
88 |
desc2_embedding = self._get_cached_embedding(breed2_features['description'])
|
89 |
description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding))
|
90 |
-
|
91 |
-
#
|
92 |
-
size_similarity =
|
93 |
-
exercise_similarity =
|
94 |
-
|
95 |
# 性格相似度
|
96 |
temp1_embedding = self._get_cached_embedding(breed1_features['temperament'])
|
97 |
temp2_embedding = self._get_cached_embedding(breed2_features['temperament'])
|
98 |
temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
|
99 |
-
|
100 |
# 健康分數相似度
|
101 |
health_score1 = self._calculate_health_score(breed1_features['breed_name'])
|
102 |
health_score2 = self._calculate_health_score(breed2_features['breed_name'])
|
103 |
health_similarity = 1.0 - abs(health_score1 - health_score2)
|
104 |
-
|
105 |
# 噪音水平相似度
|
106 |
noise_similarity = self._calculate_noise_similarity(
|
107 |
breed1_features['breed_name'],
|
108 |
breed2_features['breed_name']
|
109 |
)
|
110 |
-
|
111 |
# 加權計算
|
112 |
weights = {
|
113 |
'description': 0.25,
|
@@ -117,7 +165,7 @@ class SmartBreedMatcher:
|
|
117 |
'health': 0.15,
|
118 |
'noise': 0.15
|
119 |
}
|
120 |
-
|
121 |
final_similarity = (
|
122 |
description_similarity * weights['description'] +
|
123 |
temperament_similarity * weights['temperament'] +
|
@@ -126,7 +174,7 @@ class SmartBreedMatcher:
|
|
126 |
health_similarity * weights['health'] +
|
127 |
noise_similarity * weights['noise']
|
128 |
)
|
129 |
-
|
130 |
return final_similarity
|
131 |
|
132 |
|
@@ -192,6 +240,24 @@ class SmartBreedMatcher:
|
|
192 |
'scores': {k: round(v, 4) for k, v in scores.items()}
|
193 |
}
|
194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
def _calculate_health_score(self, breed_name: str) -> float:
|
196 |
"""計算品種的健康分數"""
|
197 |
if breed_name not in breed_health_info:
|
|
|
81 |
return sorted(similarities, key=lambda x: x[1], reverse=True)[:top_n]
|
82 |
|
83 |
|
84 |
+
# def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict) -> float:
|
85 |
+
# """計算兩個品種之間的相似度,包含健康和噪音因素"""
|
86 |
+
# # 計算描述文本的相似度
|
87 |
+
# desc1_embedding = self._get_cached_embedding(breed1_features['description'])
|
88 |
+
# desc2_embedding = self._get_cached_embedding(breed2_features['description'])
|
89 |
+
# description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding))
|
90 |
+
|
91 |
+
# # 基本特徵相似度
|
92 |
+
# size_similarity = 1.0 if breed1_features['size'] == breed2_features['size'] else 0.5
|
93 |
+
# exercise_similarity = 1.0 if breed1_features['exercise'] == breed2_features['exercise'] else 0.5
|
94 |
+
|
95 |
+
# # 性格相似度
|
96 |
+
# temp1_embedding = self._get_cached_embedding(breed1_features['temperament'])
|
97 |
+
# temp2_embedding = self._get_cached_embedding(breed2_features['temperament'])
|
98 |
+
# temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
|
99 |
+
|
100 |
+
# # 健康分數相似度
|
101 |
+
# health_score1 = self._calculate_health_score(breed1_features['breed_name'])
|
102 |
+
# health_score2 = self._calculate_health_score(breed2_features['breed_name'])
|
103 |
+
# health_similarity = 1.0 - abs(health_score1 - health_score2)
|
104 |
+
|
105 |
+
# # 噪音水平相似度
|
106 |
+
# noise_similarity = self._calculate_noise_similarity(
|
107 |
+
# breed1_features['breed_name'],
|
108 |
+
# breed2_features['breed_name']
|
109 |
+
# )
|
110 |
+
|
111 |
+
# # 加權計算
|
112 |
+
# weights = {
|
113 |
+
# 'description': 0.25,
|
114 |
+
# 'temperament': 0.20,
|
115 |
+
# 'exercise': 0.2,
|
116 |
+
# 'size': 0.05,
|
117 |
+
# 'health': 0.15,
|
118 |
+
# 'noise': 0.15
|
119 |
+
# }
|
120 |
+
|
121 |
+
# final_similarity = (
|
122 |
+
# description_similarity * weights['description'] +
|
123 |
+
# temperament_similarity * weights['temperament'] +
|
124 |
+
# exercise_similarity * weights['exercise'] +
|
125 |
+
# size_similarity * weights['size'] +
|
126 |
+
# health_similarity * weights['health'] +
|
127 |
+
# noise_similarity * weights['noise']
|
128 |
+
# )
|
129 |
+
|
130 |
+
# return final_similarity
|
131 |
+
|
132 |
def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict) -> float:
|
133 |
"""計算兩個品種之間的相似度,包含健康和噪音因素"""
|
134 |
# 計算描述文本的相似度
|
135 |
desc1_embedding = self._get_cached_embedding(breed1_features['description'])
|
136 |
desc2_embedding = self._get_cached_embedding(breed2_features['description'])
|
137 |
description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding))
|
138 |
+
|
139 |
+
# 使用新的精細計算方法
|
140 |
+
size_similarity = self._calculate_size_similarity(breed1_features['size'], breed2_features['size'])
|
141 |
+
exercise_similarity = self._calculate_exercise_similarity(breed1_features['exercise'], breed2_features['exercise'])
|
142 |
+
|
143 |
# 性格相似度
|
144 |
temp1_embedding = self._get_cached_embedding(breed1_features['temperament'])
|
145 |
temp2_embedding = self._get_cached_embedding(breed2_features['temperament'])
|
146 |
temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
|
147 |
+
|
148 |
# 健康分數相似度
|
149 |
health_score1 = self._calculate_health_score(breed1_features['breed_name'])
|
150 |
health_score2 = self._calculate_health_score(breed2_features['breed_name'])
|
151 |
health_similarity = 1.0 - abs(health_score1 - health_score2)
|
152 |
+
|
153 |
# 噪音水平相似度
|
154 |
noise_similarity = self._calculate_noise_similarity(
|
155 |
breed1_features['breed_name'],
|
156 |
breed2_features['breed_name']
|
157 |
)
|
158 |
+
|
159 |
# 加權計算
|
160 |
weights = {
|
161 |
'description': 0.25,
|
|
|
165 |
'health': 0.15,
|
166 |
'noise': 0.15
|
167 |
}
|
168 |
+
|
169 |
final_similarity = (
|
170 |
description_similarity * weights['description'] +
|
171 |
temperament_similarity * weights['temperament'] +
|
|
|
174 |
health_similarity * weights['health'] +
|
175 |
noise_similarity * weights['noise']
|
176 |
)
|
177 |
+
|
178 |
return final_similarity
|
179 |
|
180 |
|
|
|
240 |
'scores': {k: round(v, 4) for k, v in scores.items()}
|
241 |
}
|
242 |
|
243 |
+
def _calculate_size_similarity(self, size1: str, size2: str) -> float:
|
244 |
+
size_map = {'Small': 1, 'Medium': 2, 'Large': 3, 'Giant': 4}
|
245 |
+
value1 = size_map.get(size1, 2) # 預設為 'Medium'
|
246 |
+
value2 = size_map.get(size2, 2) # 預設為 'Medium'
|
247 |
+
|
248 |
+
# 計算相似度
|
249 |
+
size_similarity = 1.0 - abs(value1 - value2) / 3
|
250 |
+
return max(0.0, size_similarity) # 確保相似度在 [0, 1] 範圍內
|
251 |
+
|
252 |
+
def _calculate_exercise_similarity(self, exercise1: str, exercise2: str) -> float:
|
253 |
+
exercise_map = {'Low': 1, 'Moderate': 2, 'High': 3, 'Very High': 4}
|
254 |
+
value1 = exercise_map.get(exercise1, 2) # 預設為 'Moderate'
|
255 |
+
value2 = exercise_map.get(exercise2, 2) # 預設為 'Moderate'
|
256 |
+
|
257 |
+
# 計算相似度
|
258 |
+
exercise_similarity = 1.0 - abs(value1 - value2) / 3
|
259 |
+
return max(0.0, exercise_similarity) # 確保相似度在 [0, 1] 範圍內
|
260 |
+
|
261 |
def _calculate_health_score(self, breed_name: str) -> float:
|
262 |
"""計算品種的健康分數"""
|
263 |
if breed_name not in breed_health_info:
|