SteganographywithGPT-2

Sleeping

App Files Files Community

Xinyoumeng233hu commited on May 14, 2023

Commit

0833281

1 Parent(s): 69c4e15

Create arithmetic.py

Browse files

Files changed (1) hide show

arithmetic.py +260 -0

arithmetic.py ADDED Viewed

	@@ -0,0 +1,260 @@

+import torch
+import torch.nn.functional as F
+from utils import limit_past, kl, entropy, bits2int, int2bits, is_sent_finish, num_same_from_beg
+def encode_arithmetic(model, enc, message, context, finish_sent=False, device='cuda', temp=1.0, precision=16, topk=50000):
+    context = torch.tensor(context[-1022:], device=device, dtype=torch.long)
+    max_val = 2**precision
+    threshold = 2**(-precision)
+    cur_interval = [0, max_val] # bottom inclusive, top exclusive
+    prev = context
+    output = context
+    past = None
+    total_num = 0
+    total_num_for_stats = 0
+    total_log_probs = 0
+    total_kl = 0 # in bits
+    total_entropy_ptau = 0
+    total_num_sents = 0
+    with torch.no_grad():
+        i = 0
+        sent_finish = False
+        while i < len(message) or (finish_sent and not sent_finish):
+            logits, past = model(prev.unsqueeze(0), past=past)
+            past = limit_past(past)
+            logits[0, -1, -1] = -1e20 # endoftext token can't happen
+            logits[0, -1, 628] = -1e20 # 2 newlines token can't happen
+            logits, indices = logits[0, -1, :].sort(descending=True)
+            logits = logits.double()
+            logits_temp = logits / temp
+            probs_temp = F.softmax(logits_temp, dim=0)
+            log_probs_temp = F.log_softmax(logits_temp, dim=0)
+            log_probs = F.log_softmax(logits, dim=0)
+            # conditions for having reached the end of the message
+            if i >= len(message):
+                selection = 0
+                sent_finish = is_sent_finish(indices[selection].item(), enc)
+            else:
+                # Cutoff low probabilities that would be rounded to 0
+                cur_int_range = cur_interval[1]-cur_interval[0]
+                cur_threshold = 1/cur_int_range
+                k = min(max(2, (probs_temp < cur_threshold).nonzero()[0].item()), topk)
+                probs_temp_int = probs_temp[:k] # Cutoff all but top k
+                # Rescale to correct range
+                probs_temp_int = probs_temp_int/probs_temp_int.sum()*cur_int_range
+                # Round probabilities to integers given precision
+                probs_temp_int = probs_temp_int.round().long()
+                cum_probs = probs_temp_int.cumsum(0)
+                # Remove any elements from the bottom if rounding caused the total prob to be too large
+                overfill_index = (cum_probs > cur_int_range).nonzero()
+                if len(overfill_index) > 0:
+                    cum_probs = cum_probs[:overfill_index[0]]
+                # Add any mass to the top if removing/rounding causes the total prob to be too small
+                cum_probs += cur_int_range-cum_probs[-1] # add
+                # Get out resulting probabilities
+                probs_final = cum_probs.clone()
+                probs_final[1:] = cum_probs[1:] - cum_probs[:-1]
+                # Convert to position in range
+                cum_probs += cur_interval[0]
+                # Get selected index based on binary fraction from message bits
+                message_bits = message[i:i+precision]
+                if i+precision > len(message):
+                    message_bits = message_bits + [0]*(i+precision-len(message))
+                message_idx = bits2int(reversed(message_bits))
+                selection = (cum_probs > message_idx).nonzero()[0].item()
+                # Calculate new range as ints
+                new_int_bottom = cum_probs[selection-1] if selection > 0 else cur_interval[0]
+                new_int_top = cum_probs[selection]
+                # Convert range to bits
+                new_int_bottom_bits_inc = list(reversed(int2bits(new_int_bottom, precision)))
+                new_int_top_bits_inc = list(reversed(int2bits(new_int_top-1, precision))) # -1 here because upper bound is exclusive
+                # Consume most significant bits which are now fixed and update interval
+                num_bits_encoded = num_same_from_beg(new_int_bottom_bits_inc, new_int_top_bits_inc)
+                i += num_bits_encoded
+                new_int_bottom_bits = new_int_bottom_bits_inc[num_bits_encoded:] + [0]*num_bits_encoded
+                new_int_top_bits = new_int_top_bits_inc[num_bits_encoded:] + [1]*num_bits_encoded
+                cur_interval[0] = bits2int(reversed(new_int_bottom_bits))
+                cur_interval[1] = bits2int(reversed(new_int_top_bits))+1 # +1 here because upper bound is exclusive
+                # Gather statistics
+                total_log_probs += log_probs[selection].item()
+                q = probs_final.double()/probs_final.sum()
+                logq = q.log()
+                total_kl += kl(q, logq, log_probs[:len(q)])
+                total_entropy_ptau += entropy(probs_temp, log_probs_temp)
+                total_num_for_stats += 1
+            # Update history with new token
+            prev = indices[selection].view(1)
+            output = torch.cat((output, prev))
+            total_num += 1
+            #print(enc.decode(prev.tolist()), message_bits[:num_bits_encoded])
+            # For text->bits->text
+            partial = enc.decode(output[len(context):].tolist())
+            if '<eos>' in partial:
+                break
+    avg_NLL = -total_log_probs/total_num_for_stats
+    avg_KL = total_kl/total_num_for_stats
+    words_per_bit = total_num_for_stats/i
+    # avg_Hq = total_entropy_ptau/total_num_for_stats
+    return output[len(context):].tolist(), avg_NLL, avg_KL, words_per_bit
+def decode_arithmetic(model, enc, text, context, device='cuda', temp=1.0, precision=16, topk=50000):
+    # inp is a list of token indices
+    # context is a list of token indices
+    inp = enc.encode(text)
+    # common BPE error case: 128, 128 (2 newlines) is interpretted as 628 (2 newlines)
+    i = 0
+    while i < len(inp):
+        if inp[i] == 628:
+            inp[i] = 198
+            inp[i+1:i+1] = [198]
+            i += 2
+        else:
+            i += 1
+    context = torch.tensor(context[-1022:], device=device, dtype=torch.long)
+    max_val = 2**precision
+    threshold = 2**(-precision)
+    cur_interval = [0, max_val] # bottom inclusive, top exclusive
+    prev = context
+    past = None
+    message = []
+    with torch.no_grad():
+        i = 0
+        while i < len(inp):
+            logits, past = model(prev.unsqueeze(0), past=past)
+            past = limit_past(past)
+            logits[0, -1, -1] = -1e10 # endoftext can't happen
+            logits[0, -1, 628] = -1e10 # 2 newlines can't happen
+            logits, indices = logits[0, -1, :].sort(descending=True)
+            logits = logits.double()
+            logits_temp = logits / temp
+            probs_temp = F.softmax(logits_temp, dim=0)
+            # Cutoff low probabilities that would be rounded to 0
+            cur_int_range = cur_interval[1]-cur_interval[0]
+            cur_threshold = 1/cur_int_range
+            k = min(max(2, (probs_temp < cur_threshold).nonzero()[0].item()), topk)
+            probs_temp_int = probs_temp[:k] # Cutoff all but top k
+            # Rescale to correct range
+            probs_temp_int = probs_temp_int/probs_temp_int.sum()*cur_int_range
+            # Round probabilities to integers given precision
+            probs_temp_int = probs_temp_int.round().long()
+            cum_probs = probs_temp_int.cumsum(0)
+            # Remove any elements from the bottom if rounding caused the total prob to be too large
+            overfill_index = (cum_probs > cur_int_range).nonzero()
+            if len(overfill_index) > 0:
+                cum_probs = cum_probs[:overfill_index[0]]
+                k = overfill_index[0].item()
+            # Add any mass to the top if removing/rounding causes the total prob to be too small
+            cum_probs += cur_int_range-cum_probs[-1] # add
+            # Covnert to position in range
+            cum_probs += cur_interval[0]
+            rank = (indices == inp[i]).nonzero().item()
+            # Handle most errors that could happen because of BPE with heuristic
+            if rank >= k:
+                true_token_text = enc.decoder[inp[i]]
+                for rank_idx in range(k):
+                    prop_token_text = enc.decoder[indices[rank_idx].item()]
+                    # common case that is not caught
+                    if inp[i] == 128 and indices[rank_idx] == 198:
+                        rank = rank_idx
+                        inp[i] = indices[rank_idx].item()
+                        break
+                    # Is there a more likely prefix token that could be the actual token generated?
+                    if len(prop_token_text) <= len(true_token_text) and \
+                            prop_token_text == true_token_text[:len(prop_token_text)]:
+                        rank = rank_idx
+                        suffix = true_token_text[len(prop_token_text):]
+                        suffix_tokens = enc.encode(suffix) # a list
+                        inp[i] = indices[rank_idx].item()
+                        inp[i+1:i+1] = suffix_tokens # insert suffix tokens into list
+                        break
+                    # Is there a more likely longer token that could be the actual token generated?
+                    elif len(prop_token_text) > len(true_token_text) and \
+                              true_token_text == prop_token_text[:len(true_token_text)]:
+                        whole_text = true_token_text
+                        num_extra = 1
+                        while len(whole_text) < len(prop_token_text):
+                            whole_text += enc.decoder[inp[i+num_extra]]
+                            num_extra += 1
+                        if prop_token_text == whole_text[:len(prop_token_text)]:
+                            rank = rank_idx
+                            inp[i] = indices[rank_idx].item()
+                            for j in range(1, num_extra):
+                                del inp[i+j]
+                            if len(whole_text) > len(prop_token_text):
+                                suffix = whole_text[len(prop_token_text):]
+                                suffix_tokens = enc.encode(suffix) # a list
+                                inp[i+1:i+1] = suffix_tokens # insert suffix tokens into list
+                            break
+                else:
+                    print('Unable to fix BPE error: token received: %s=%d, text: %s' % (true_token_text, inp[i], text))
+                    rank = 0
+            selection = rank
+            # Calculate new range as ints
+            new_int_bottom = cum_probs[selection-1] if selection > 0 else cur_interval[0]
+            new_int_top = cum_probs[selection]
+            # Convert range to bits
+            new_int_bottom_bits_inc = list(reversed(int2bits(new_int_bottom, precision)))
+            new_int_top_bits_inc = list(reversed(int2bits(new_int_top-1, precision))) # -1 here because upper bound is exclusive
+            # Emit most significant bits which are now fixed and update interval
+            num_bits_encoded = num_same_from_beg(new_int_bottom_bits_inc, new_int_top_bits_inc)
+            if i == len(inp)-1:
+                new_bits = new_int_bottom_bits_inc
+            else:
+                new_bits = new_int_top_bits_inc[:num_bits_encoded]
+            message += new_bits
+            new_int_bottom_bits = new_int_bottom_bits_inc[num_bits_encoded:] + [0]*num_bits_encoded
+            new_int_top_bits = new_int_top_bits_inc[num_bits_encoded:] + [1]*num_bits_encoded
+            cur_interval[0] = bits2int(reversed(new_int_bottom_bits))
+            cur_interval[1] = bits2int(reversed(new_int_top_bits))+1 # +1 here because upper bound is exclusive
+            # Update history with new token
+            prev = torch.tensor([inp[i]], device=device, dtype=torch.long)
+            #print(enc.decode([inp[i]]), new_bits)
+            i += 1
+    return message