ALIGN-Sim / src /evaluate.py
yzm0034's picture
Fixed: tensor dtype fixed.
76a5b51 verified
import argparse
import numpy as np
import os
import pandas as pd
from tqdm import tqdm
import torch
import utils
from metrics import *
import sys
sys.path.insert(0,"./")
from Models.SentenceTransformersModel import SentenceTransformerModels
from Models.llm_embeddings import LLMEmbeddings
from main_args import get_args
from metrics import CosineMetric
def read_pertubed_data(filename, task, lang="en"):
# path = f"./data/perturbed_dataset/{lang}/{task}/{filename}.csv"
if not os.path.exists(filename):
raise FileNotFoundError(f"File {filename} not found.")
return pd.read_csv(filename)
def run(args_model, dataset_name, target_lang,args_task, default_gpu="cuda", metric="cosine",save=False,batch_size=2):
model = LLMEmbeddings(args_model, device=default_gpu)
pertubed_data_path = f"./data/perturbed_dataset/{target_lang}/{args_task}/{dataset_name}_{args_task}_perturbed_{target_lang}.csv" # check if path exist
data = read_pertubed_data(pertubed_data_path, args_task)
# dataset_name = dataset_name.split(".")[0] if args_task == "paraphrase" else dataset_name.split("_")[0]
print(f"\n*** Model {args_model} on {dataset_name} dataset for {args_task} task ***\n")
# Collect all sentences based on task
sentences = []
if args_task in ["Anto","anto","Antonym"]:
cols = ["original_sentence", "paraphrased_sentence", "perturb_n1"]
for _, row in data[cols].iterrows():
sentences.extend(row.values)
elif args_task in ["jumbling", "Jumbling","jumb"]:
cols = ["original_sentence", "paraphrased_sentence", "perturb_n1", "perturb_n2", "perturb_n3"]
for _, row in data[cols].iterrows():
sentences.extend(row.values)
elif args_task in ["Syn","syn","Synonym"]:
cols = ["original_sentence", "perturb_n1", "perturb_n2", "perturb_n3"]
for _, row in data[cols].iterrows():
sentences.extend(row.values)
elif args_task in ["paraphrase","Paraphrase","para"]:
cols = ["original_sentence", "paraphrased_sentence"]
for _, row in data[cols].iterrows():
sentences.extend(row.values)
# Batch process embeddings
embeddings = model.encode_batch(sentences,batch_size=batch_size)
# Ensure embeddings are on CPU and in numpy format
if args_model == "chatgpt":
# For chatgpt, embeddings is likely a list of torch tensors
embeddings = [emb.cpu().numpy() if isinstance(emb, torch.Tensor) else emb for emb in embeddings]
embeddings = np.array(embeddings)
else:
# For other models, assume a single torch tensor
if isinstance(embeddings, torch.Tensor):
embeddings = embeddings.cpu().numpy()
# Process embeddings based on task
if args_task == "anto":
emb_org = embeddings[0::3] # start at 0, step by 3
emb_para = embeddings[1::3] # start at 1, step by 3
emb_anto = embeddings[2::3] # start at 2, step by 3
mean_para,sim_para = utils.similarity_between_sent(emb_org, emb_para)
mean_anto,sim_anto = utils.similarity_between_sent(emb_org, emb_anto)
data["sim_org_para"] = sim_para
data["sim_org_anto"] = sim_anto
data["diff_org_para"] = np.array(sim_para) - np.array(sim_anto)
print(f"""The summary for Antonym Criteria for {args_model} \n {data.describe()} """)
elif args_task == "jumbling":
emb_org = embeddings[0::5] # start at 0, step by 3
emb_para = embeddings[1::5] # start at 1, step by 3
emb_n1 = embeddings[2::5] # start at 2, step by 3
emb_n2 = embeddings[3::5]
emb_n3 = embeddings[4::5]
# Compute metrics for each perturbation
mean_para,sim_para = utils.similarity_between_sent(emb_org, emb_para)
mean_n1,sim_n1 = utils.similarity_between_sent(emb_org, emb_n1)
mean_n2,sim_n2 = utils.similarity_between_sent(emb_org, emb_n2)
mean_n3,sim_n3 = utils.similarity_between_sent(emb_org, emb_n3)
data["sim_org_para"] = sim_para
data["sim_org_n1"] = sim_n1
data["sim_org_n2"] = sim_n2
data["sim_org_n3"] = sim_n3
data["diff_org_para"] = sim_para - sim_para # Zero as per original
data["diff_org_n1"] = sim_para - sim_n1
data["diff_org_n2"] = sim_para - sim_n2
data["diff_org_n3"] = sim_para - sim_n3
print(f"""The summary for Jumbling Criteria for {args_model} \n {data.describe()} """)
elif args_task == "syn":
emb_org = embeddings[0::4] # start at 0, step by 3
emb_s1 = embeddings[1::4] # start at 1, step by 3
emb_s2 = embeddings[2::4] # start at 2, step by 3
emb_s3 = embeddings[3::4]
_,sim_s1 = utils.similarity_between_sent(emb_org, emb_s1)
_,sim_s2 = utils.similarity_between_sent(emb_org, emb_s2)
_,sim_s3 = utils.similarity_between_sent(emb_org, emb_s3)
data["sim_org_s1"] = sim_s1
data["sim_org_s2"] = sim_s2
data["sim_org_s3"] = sim_s3
print(f"""The summary for Synonym Criteria for {args_model} \n {data.describe()} """)
elif args_task == "paraphrase":
emb_s1 = embeddings[0::2] # start at 0, step by 3
emb_s2 = embeddings[1::2]
data["sim"] = utils.similarity_between_sent(emb_s1, emb_s2)
print(f"""The summary for Paraphrase Criteria for {args_model} \n {data.describe()} """)
if save:
path = f"./Results/{target_lang}/{args_task}/{dataset_name}_{args_model}_{args_task}_metric.csv"
data.to_csv(path)
print("Data saved at path : {path} ")
return data
if __name__ == "__main__":
if sys.gettrace() is None:
parser = get_args()
config = {
"args_model": parser.model_name,
"dataset_name": parser.perturbed_dataset,
"args_task": parser.task,
"default_gpu": parser.gpu,
"save": parser.save,
"target_lang": parser.target_lang,
"metric":parser.metric,
"batch_size":2
}
else:
#sentence-transformers/all-MiniLM-L6-v2
config = {
"args_model": "llama3",
"dataset_name": "mrpc",
"args_task": "syn",
"default_gpu": "cuda:2",
"save": False,
"target_lang": "en"
}
run(**config)