PromptCARE / soft_prompt /exp11_ttest.py
homeway's picture
Add application file
7713b1f
import argparse
import os
import torch
import numpy as np
import random
import os.path as osp
from scipy import stats
from tqdm import tqdm
ROOT = os.path.abspath(os.path.dirname(__file__))
def set_default_seed(seed=1000):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed) # multi-GPU
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
print(f"<--------------------------- seed:{seed} --------------------------->")
def get_args():
parser = argparse.ArgumentParser(description="Build basic RemovalNet.")
parser.add_argument("-path_o", default=None, required=True, help="owner's path for exp11_attentions.pth")
parser.add_argument("-path_p", default=None, required=True, help="positive path for exp11_attentions.pth")
parser.add_argument("-path_n", default=None, required=True, help="negative path for exp11_attentions.pth")
parser.add_argument("-model_name", default=None, help="model_name")
parser.add_argument("-seed", default=2233, help="seed")
parser.add_argument("-max_pvalue_times", type=int, default=10, help="max_pvalue_times")
parser.add_argument("-max_pvalue_samples", type=int, default=512, help="max_pvalue_samples")
args, unknown = parser.parse_known_args()
args.ROOT = ROOT
if "checkpoints" not in args.path_o:
args.path_o = osp.join(ROOT, "checkpoints", args.path_o, "exp11_attentions.pth")
if "checkpoints" not in args.path_p:
args.path_p = osp.join(ROOT, "checkpoints", args.path_p, "exp11_attentions.pth")
if "checkpoints" not in args.path_n:
args.path_n = osp.join(ROOT, "checkpoints", args.path_n, "exp11_attentions.pth")
if args.model_name is not None:
if args.model_name == "opt-1.3b":
args.model_name = "facebook/opt-1.3b"
return args
def get_predict_token(result):
clean_labels = result["clean_labels"]
target_labels = result["target_labels"]
attentions = result["wmk_attentions"]
total_idx = torch.arange(len(attentions[0])).tolist()
select_idx = list(set(torch.cat([clean_labels.view(-1), target_labels.view(-1)]).tolist()))
no_select_ids = list(set(total_idx).difference(set(select_idx)))
probs = torch.softmax(attentions, dim=1)
probs[:, no_select_ids] = 0.
tokens = probs.argmax(dim=1).numpy()
return tokens
def main():
args = get_args()
set_default_seed(args.seed)
result_o = torch.load(args.path_o, map_location="cpu")
result_p = torch.load(args.path_p, map_location="cpu")
result_n = torch.load(args.path_n, map_location="cpu")
print(f"-> load from: {args.path_n}")
tokens_w = get_predict_token(result_o) # watermarked
tokens_p = get_predict_token(result_p) # positive
tokens_n = get_predict_token(result_n) # negative
words_w, words_p, words_n = [], [], []
if args.model_name is not None:
if "llama" in args.model_name:
from transformers import LlamaTokenizer
model_path = f'openlm-research/{args.model_name}'
tokenizer = LlamaTokenizer.from_pretrained(model_path)
else:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(args.model_name)
words_w = tokenizer.convert_ids_to_tokens(tokens_w[:10000])
words_p = tokenizer.convert_ids_to_tokens(tokens_p[:10000])
words_n = tokenizer.convert_ids_to_tokens(tokens_n[:10000])
print("-> [watermarked] tokens", tokens_w[:20], words_w[:20], len(words_w))
print("-> [positive] tokens", tokens_p[:20], words_p[:20], len(words_p))
print("-> [negative] tokens", tokens_n[:20], words_n[:20], len(words_n))
pvalue = np.zeros([2, args.max_pvalue_times])
statistic = np.zeros([2, args.max_pvalue_times])
per_size = args.max_pvalue_samples
phar = tqdm(range(args.max_pvalue_times))
for step in phar:
rand_idx = np.random.choice(np.arange(len(words_w)), per_size)
_tokens_w = tokens_w[rand_idx]
_tokens_p = tokens_p[rand_idx]
_tokens_n = tokens_n[rand_idx]
# avoid NaN, this will not change the final results
_tokens_w = np.array(_tokens_w, dtype=np.float32)
tokens_w[-1] += 0.00001
res_p = stats.ttest_ind(_tokens_w, np.array(_tokens_p, dtype=np.float32), equal_var=True, nan_policy="omit")
res_n = stats.ttest_ind(_tokens_w, np.array(_tokens_n, dtype=np.float32), equal_var=True, nan_policy="omit")
pvalue[0, step] = res_n.pvalue
pvalue[1, step] = res_p.pvalue
statistic[0, step] = res_n.statistic
statistic[1, step] = res_p.statistic
phar.set_description(f"[{step}/{args.max_pvalue_times}] negative:{res_n.pvalue} positive:{res_p.pvalue}")
print(f"-> pvalue:{pvalue}")
print(f"-> [negative]-[{args.max_pvalue_samples}] pvalue:{pvalue.mean(axis=1)[0]} state:{statistic.mean(axis=1)[0]}")
print(f"-> [positive]-[{args.max_pvalue_samples}] pvalue:{pvalue.mean(axis=1)[1]} state:{statistic.mean(axis=1)[1]}")
print(args.path_o)
if __name__ == "__main__":
main()