|
from abc import ABC, abstractmethod |
|
import numpy as np |
|
|
|
|
|
try: |
|
import torch |
|
except ImportError: |
|
torch = None |
|
|
|
def to_numpy(arr) -> np.ndarray: |
|
""" |
|
Converts the input array (which can be a numpy array, torch tensor, or list) to a numpy array. |
|
""" |
|
|
|
if torch is not None and isinstance(arr, torch.Tensor): |
|
|
|
return arr.detach().cpu().numpy() |
|
|
|
if isinstance(arr, np.ndarray): |
|
return arr |
|
|
|
return np.array(arr) |
|
|
|
class Metric(ABC): |
|
""" |
|
Abstract base class for evaluation metrics. |
|
Subclasses must implement the compute method. |
|
""" |
|
@abstractmethod |
|
def compute(self, vector1, vector2) -> float: |
|
""" |
|
Compute the metric between two vectors. |
|
|
|
Args: |
|
vector1: The first vector (numpy array, torch tensor, list, etc.). |
|
vector2: The second vector (numpy array, torch tensor, list, etc.). |
|
|
|
Returns: |
|
float: The computed metric value. |
|
""" |
|
pass |
|
|
|
class CosineMetric(Metric): |
|
""" |
|
Implementation of the cosine similarity metric. |
|
""" |
|
def compute(self, vector1, vector2) -> float: |
|
|
|
vec1 = to_numpy(vector1) |
|
vec2 = to_numpy(vector2) |
|
|
|
dot_product = np.dot(vec1, vec2) |
|
norm1 = np.linalg.norm(vec1) |
|
norm2 = np.linalg.norm(vec2) |
|
if norm1 == 0 or norm2 == 0: |
|
return 0.0 |
|
return dot_product / (norm1 * norm2) |
|
|
|
class NEDMetric(Metric): |
|
""" |
|
Implementation of a normalized Euclidean distance metric. |
|
""" |
|
def compute(self, vector1, vector2) -> float: |
|
|
|
vec1 = to_numpy(vector1) |
|
vec2 = to_numpy(vector2) |
|
|
|
euclidean_distance = np.linalg.norm(vec1 - vec2) |
|
norm_sum = np.linalg.norm(vec1) + np.linalg.norm(vec2) |
|
if norm_sum == 0: |
|
return 0.0 |
|
return euclidean_distance / norm_sum |
|
|
|
class EuclideanMetric(Metric): |
|
def compute(self, vector1, vector2) -> float: |
|
return np.linalg.norm(vector1 - vector2, axis=1) |
|
|
|
def dot_product(x, y): |
|
return np.dot(x, y.T) |
|
|
|
def compute_ned_distance(x, y): |
|
return 0.5 * np.var(x - y) / (np.var(x) + np.var(y)) |
|
|
|
def batch_NED(batch_u, batch_v): |
|
batch_u = np.array(batch_u) |
|
batch_v = np.array(batch_v) |
|
|
|
|
|
assert batch_u.shape[0] == batch_v.shape[0], "The batch sizes of u and v must be the same." |
|
|
|
scores = [] |
|
|
|
for u, v in zip(batch_u, batch_v): |
|
u = np.array(u) |
|
v = np.array(v) |
|
|
|
u_mean = np.mean(u) |
|
v_mean = np.mean(v) |
|
|
|
u_centered = u - u_mean |
|
v_centered = v - v_mean |
|
|
|
numerator = np.linalg.norm(u_centered - v_centered, ord=2)**2 |
|
denominator = np.linalg.norm(u_centered, ord=2)**2 + np.linalg.norm(v_centered, ord=2)**2 |
|
|
|
ned_score = 0.5 * numerator / denominator |
|
scores.append(ned_score) |
|
|
|
return np.array(scores) |
|
|
|
|
|
def NED2(u, v): |
|
u = np.array(u) |
|
v = np.array(v) |
|
|
|
u_mean = np.mean(u) |
|
v_mean = np.mean(v) |
|
|
|
u_centered = u - u_mean |
|
v_centered = v - v_mean |
|
|
|
numerator = np.linalg.norm(u_centered - v_centered, ord=2)**2 |
|
denominator = np.linalg.norm(u_centered, ord=2)**2 + np.linalg.norm(v_centered, ord=2)**2 |
|
|
|
return 0.5 * numerator / denominator |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
vec_np = np.array([1.0, 2.0, 3.0]) |
|
if torch is not None: |
|
vec_torch = torch.tensor([4.0, 5.0, 6.0]) |
|
else: |
|
vec_torch = [4.0, 5.0, 6.0] |
|
|
|
cosine = CosineMetric() |
|
ned = NEDMetric() |
|
|
|
print("Cosine Similarity:", cosine.compute(vec_np, vec_torch)) |
|
print("Normalized Euclidean Distance:", ned.compute(vec_np, vec_torch)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|