DivEye - PR (fix bugs, unmodularize)

#13
by FloofCat - opened
Files changed (1) hide show
  1. app.py +59 -82
app.py CHANGED
@@ -18,95 +18,75 @@ import os
18
 
19
  theme = gr.Theme.from_hub("gstaff/xkcd")
20
 
21
- class Diversity:
22
- def __init__(self, model, tokenizer, device):
23
- self.tokenizer = tokenizer
24
- self.model = model
25
- self.device = device
26
-
27
- def compute_log_likelihoods(self, text):
28
- tokens = self.tokenizer.encode(text, return_tensors="pt", truncation=True, max_length=1024).to(self.device)
29
- with torch.no_grad():
30
- outputs = self.model(tokens, labels=tokens)
31
- logits = outputs.logits
32
- shift_logits = logits[:, :-1, :].squeeze(0)
33
- shift_labels = tokens[:, 1:].squeeze(0)
34
- log_probs = torch.log_softmax(shift_logits.float(), dim=-1)
35
- token_log_likelihoods = log_probs[range(shift_labels.shape[0]), shift_labels].cpu().numpy()
36
- return token_log_likelihoods
 
37
 
38
- def compute_surprisal(self, text):
39
- log_likelihoods = self.compute_log_likelihoods(text)
40
- return -log_likelihoods
41
-
42
- def compute_features(self, text):
43
- surprisals = self.compute_surprisal(text)
44
- log_likelihoods = self.compute_log_likelihoods(text)
45
- if len(surprisals) < 10 or len(log_likelihoods) < 3:
46
- return None
47
 
48
- s = np.array(surprisals)
49
- mean_s, std_s, var_s, skew_s, kurt_s = np.mean(s), np.std(s), np.var(s), skew(s), kurtosis(s)
50
- diff_s = np.diff(s)
51
- mean_diff, std_diff = np.mean(diff_s), np.std(diff_s)
52
- first_order_diff = np.diff(log_likelihoods)
53
- second_order_diff = np.diff(first_order_diff)
54
- var_2nd = np.var(second_order_diff)
55
- entropy_2nd = entropy(np.histogram(second_order_diff, bins=20, density=True)[0])
56
- autocorr_2nd = np.corrcoef(second_order_diff[:-1], second_order_diff[1:])[0, 1] if len(second_order_diff) > 1 else 0
57
- comp_ratio = len(zlib.compress(text.encode('utf-8'))) / len(text.encode('utf-8'))
 
 
 
58
 
59
- return [mean_s, std_s, var_s, skew_s, kurt_s, mean_diff, std_diff, var_2nd, entropy_2nd, autocorr_2nd, comp_ratio]
 
60
 
61
- class BiScope:
62
- def __init__(self, model, tokenizer, device):
63
- self.COMPLETION_PROMPT_ONLY = "Complete the following text: "
64
- self.tokenizer = tokenizer
65
- self.model = model
66
- self.device = device
67
-
68
- def compute_fce_loss(self, logits, targets, text_slice):
69
- return CrossEntropyLoss(reduction='none')(
 
 
 
 
70
  logits[0, text_slice.start-1:text_slice.stop-1, :],
71
  targets
72
  ).detach().cpu().numpy()
73
-
74
- def compute_bce_loss(self, logits, targets, text_slice):
75
- return CrossEntropyLoss(reduction='none')(
76
  logits[0, text_slice, :],
77
  targets
78
  ).detach().cpu().numpy()
79
 
80
- def detect_single_sample(self, sample):
81
- prompt_ids = self.tokenizer(self.COMPLETION_PROMPT_ONLY, return_tensors='pt').input_ids.to(self.device)
82
- text_ids = self.tokenizer(sample, return_tensors='pt', max_length=2000, truncation=True).input_ids.to(self.device)
83
- combined_ids = torch.cat([prompt_ids, text_ids], dim=1)
84
- text_slice = slice(prompt_ids.shape[1], combined_ids.shape[1])
85
-
86
- outputs = self.model(input_ids=combined_ids)
87
- logits = outputs.logits
88
- targets = combined_ids[0][text_slice]
89
-
90
- fce_loss = self.compute_fce_loss(logits, targets, text_slice)
91
- bce_loss = self.compute_bce_loss(logits, targets, text_slice)
92
-
93
- features = []
94
- for p in range(1, 10):
95
- split = len(fce_loss) * p // 10
96
- fce_clipped = np.nan_to_num(np.clip(fce_loss[split:], -1e6, 1e6), nan=0.0, posinf=1e6, neginf=-1e6)
97
- bce_clipped = np.nan_to_num(np.clip(bce_loss[split:], -1e6, 1e6), nan=0.0, posinf=1e6, neginf=-1e6)
98
- features.extend([
99
- np.mean(fce_clipped), np.max(fce_clipped), np.min(fce_clipped), np.std(fce_clipped),
100
- np.mean(bce_clipped), np.max(bce_clipped), np.min(bce_clipped), np.std(bce_clipped)
101
- ])
102
- return features
103
-
104
- # ===========================================================
105
- @spaces.GPU
106
- def evaluate(diveye, biscope, text):
107
- global model
108
- diveye_features = diveye.compute_features(text)
109
- biscope_features = biscope.detect_single_sample(text)
110
 
111
  for f in biscope_features:
112
  diveye_features.append(f)
@@ -133,7 +113,7 @@ def detect_ai_text(text):
133
  )
134
 
135
  # Call software
136
- ai_prob = evaluate(diveye, biscope, text)
137
  human_prob = 1 - ai_prob
138
 
139
  if ai_prob > 0.7:
@@ -178,9 +158,6 @@ if torch.cuda.is_available():
178
  model = xgb.XGBClassifier()
179
  model.load_model(model_path)
180
 
181
- diveye = Diversity(div_model, div_tokenizer, div_model.device)
182
- biscope = BiScope(bi_model, bi_tokenizer, bi_model.device)
183
-
184
  # Gradio app setup
185
  with gr.Blocks(title="DivEye") as demo:
186
  gr.HTML("""
 
18
 
19
  theme = gr.Theme.from_hub("gstaff/xkcd")
20
 
21
+ # ===========================================================
22
+ @spaces.GPU
23
+ def evaluate(text):
24
+ global model, div_model, div_tokenizer, bi_model, bi_tokenizer
25
+
26
+ # =====================================================================
27
+ # DivEye features
28
+ diveye_features = []
29
+ # 1. Token log likelihoods
30
+ tokens = div_tokenizer.encode(text, return_tensors="pt", truncation=True, max_length=1024).to(div_model.device)
31
+ with torch.no_grad():
32
+ outputs = div_model(tokens, labels=tokens)
33
+ logits = outputs.logits
34
+ shift_logits = logits[:, :-1, :].squeeze(0)
35
+ shift_labels = tokens[:, 1:].squeeze(0)
36
+ log_probs = torch.log_softmax(shift_logits.float(), dim=-1)
37
+ token_log_likelihoods = log_probs[range(shift_labels.shape[0]), shift_labels].cpu().numpy()
38
 
39
+ # 2. Surprisal
40
+ surprisals = -token_log_likelihoods
 
 
 
 
 
 
 
41
 
42
+ if len(surprisals) < 10 or len(token_log_likelihoods) < 3:
43
+ diveye_features = [0.0] * 11
44
+
45
+ s = np.array(surprisals)
46
+ mean_s, std_s, var_s, skew_s, kurt_s = np.mean(s), np.std(s), np.var(s), skew(s), kurtosis(s)
47
+ diff_s = np.diff(s)
48
+ mean_diff, std_diff = np.mean(diff_s), np.std(diff_s)
49
+ first_order_diff = np.diff(token_log_likelihoods)
50
+ second_order_diff = np.diff(first_order_diff)
51
+ var_2nd = np.var(second_order_diff)
52
+ entropy_2nd = entropy(np.histogram(second_order_diff, bins=20, density=True)[0])
53
+ autocorr_2nd = np.corrcoef(second_order_diff[:-1], second_order_diff[1:])[0, 1] if len(second_order_diff) > 1 else 0
54
+ comp_ratio = len(zlib.compress(text.encode('utf-8'))) / len(text.encode('utf-8'))
55
 
56
+ diveye_features = [mean_s, std_s, var_s, skew_s, kurt_s, mean_diff, std_diff, var_2nd, entropy_2nd, autocorr_2nd, comp_ratio]
57
+ # =====================================================================
58
 
59
+ # =====================================================================
60
+ # BiScope features
61
+ COMPLETION_PROMPT_ONLY = "Complete the following text: "
62
+ prompt_ids = bi_tokenizer(COMPLETION_PROMPT_ONLY, return_tensors='pt').input_ids.to(bi_model.device)
63
+ text_ids = bi_tokenizer(text, return_tensors='pt', max_length=2000, truncation=True).input_ids.to(bi_model.device)
64
+ combined_ids = torch.cat([prompt_ids, text_ids], dim=1)
65
+ text_slice = slice(prompt_ids.shape[1], combined_ids.shape[1])
66
+
67
+ outputs = bi_model(input_ids=combined_ids)
68
+ logits = outputs.logits
69
+ targets = combined_ids[0][text_slice]
70
+
71
+ fce_loss = CrossEntropyLoss(reduction='none')(
72
  logits[0, text_slice.start-1:text_slice.stop-1, :],
73
  targets
74
  ).detach().cpu().numpy()
75
+ bce_loss = CrossEntropyLoss(reduction='none')(
 
 
76
  logits[0, text_slice, :],
77
  targets
78
  ).detach().cpu().numpy()
79
 
80
+ biscope_features = []
81
+ for p in range(1, 10):
82
+ split = len(fce_loss) * p // 10
83
+ fce_clipped = np.nan_to_num(np.clip(fce_loss[split:], -1e6, 1e6), nan=0.0, posinf=1e6, neginf=-1e6)
84
+ bce_clipped = np.nan_to_num(np.clip(bce_loss[split:], -1e6, 1e6), nan=0.0, posinf=1e6, neginf=-1e6)
85
+ biscope_features.extend([
86
+ np.mean(fce_clipped), np.max(fce_clipped), np.min(fce_clipped), np.std(fce_clipped),
87
+ np.mean(bce_clipped), np.max(bce_clipped), np.min(bce_clipped), np.std(bce_clipped)
88
+ ])
89
+ # =====================================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  for f in biscope_features:
92
  diveye_features.append(f)
 
113
  )
114
 
115
  # Call software
116
+ ai_prob = evaluate(text)
117
  human_prob = 1 - ai_prob
118
 
119
  if ai_prob > 0.7:
 
158
  model = xgb.XGBClassifier()
159
  model.load_model(model_path)
160
 
 
 
 
161
  # Gradio app setup
162
  with gr.Blocks(title="DivEye") as demo:
163
  gr.HTML("""