Spaces:
Sleeping
Sleeping
File size: 12,984 Bytes
229a3ba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import numpy as np
import bitarray
import sys
import re
import math
from meteor import encode_meteor,decode_meteor
from utils import get_model, encode_context
from arithmetic import encode_arithmetic, decode_arithmetic
from block_baseline import get_bins, encode_block, decode_block
from huffman_baseline import encode_huffman, decode_huffman
from sample import sample
def encode_message(mode, message_str, context):
enc, model = get_model(model_name='gpt2')
## PARAMETERS
# message_str = input("input secret message:")
unicode_enc = False
# mode = 'meteor'
# mode = input("Please enter mode (arithmetic, huffman, bins, or sample): ")
block_size = 3 # for huffman and bins
temp = 0.9 # for arithmetic
precision = 26 # for arithmetic
sample_tokens = 100 # for sample
topk = 300
finish_sent=True # whether or not to force finish sent. If so, stats displayed will be for non-finished sentence
meteor_sort = False
meteor_random = False
key = b'0x01'*64
sample_seed_prefix = b'sample'
nonce = b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
## VALIDATE PARAMETERS
if mode not in ['meteor', 'arithmetic', 'huffman', 'bins']:
raise NotImplementedError
if mode == 'bins':
bin2words, words2bin = get_bins(len(enc.encoder), block_size)
# context = \
# """Washington received his initial military training and command with the Virginia Regiment during the French and Indian War. He was later elected to the Virginia House of Burgesses and was named a delegate to the Continental Congress, where he was appointed Commanding General of the nation's Continental Army. Washington led American forces, allied with France, in the defeat of the British at Yorktown. Once victory for the United States was in hand in 1783, Washington resigned his commission.
# """
# context = "Despite a long history of research and wide-spread applications to censorship resistant systems, practical steganographic systems capable of embedding messages into realistic communication distributions, like text, do not exist." #@param ["Washington received his initial military training and command with the Virginia Regiment during the French and Indian War. He was later elected to the Virginia House of Burgesses and was named a delegate to the Continental Congress, where he was appointed Commanding General of the nation's Continental Army. Washington led American forces, allied with France, in the defeat of the British at Yorktown. Once victory for the United States was in hand in 1783, Washington resigned his commission.", "The Alvarez hypothesis posits that the mass extinction of the dinosaurs and many other living things during the Cretaceous-Paleogene extinction event was caused by the impact of a large asteroid on the Earth. Prior to 2013, it was commonly cited as having happened about 65 million years ago, but Renne and colleagues (2013) gave an updated value of 66 million years. Evidence indicates that the asteroid fell in the Yucatan Peninsula, at Chicxulub, Mexico. The hypothesis is named after the father-and-son team of scientists Luis and Walter Alvarez, who first suggested it in 1980. Shortly afterwards, and independently, the same was suggested by Dutch paleontologist Jan Smit.", "Despite a long history of research and wide-spread applications to censorship resistant systems, practical steganographic systems capable of embedding messages into realistic communication distributions, like text, do not exist."] {allow-input: true}
context_tokens = encode_context(context, enc)
# ------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------
# First encode message to uniform bits, without any context
# (not essential this is arithmetic vs ascii, but it's more efficient when the message is natural language)
if unicode_enc:
ba = bitarray.bitarray()
ba.frombytes(message_str.encode('utf-8'))
message = ba.tolist()
else:
message_ctx = [enc.encoder['<|endoftext|>']]
message_str += '<eos>'
message = decode_arithmetic(model, enc, message_str, message_ctx, precision=40, topk=60000)
# Next encode bits into cover text, using arbitrary context
Hq = 0
if mode == 'arithmetic':
out, nll, kl, words_per_bit = encode_arithmetic(model, enc, message, context_tokens, temp=temp, finish_sent=finish_sent, precision=precision, topk=topk)
elif mode == 'huffman':
out, nll, kl, words_per_bit = encode_huffman(model, enc, message, context_tokens, block_size, finish_sent=finish_sent)
elif mode == 'bins':
out, nll, kl, words_per_bit = encode_block(model, enc, message, context_tokens, block_size, bin2words, words2bin, finish_sent=finish_sent)
elif mode == 'meteor':
out, nll, kl, words_per_bit = encode_meteor(model, enc, message, context_tokens, temp=temp, finish_sent=finish_sent,
precision=precision, topk=topk, is_sort=meteor_sort, randomize_key=meteor_random, input_key=key, input_nonce=nonce)
elif mode == 'sample':
out, nll, kl, Hq = sample(model, enc, sample_tokens, context_tokens, temperature=temp, topk=topk)
words_per_bit = 1
text = enc.decode(out)
# print(message)
# print(len(message))
print("="*40 + " Encoding " + "="*40)
print(text)
print('ppl: %0.2f, kl: %0.3f, words/bit: %0.2f, bits/word: %0.2f, entropy: %.2f' % (math.exp(nll), kl, words_per_bit, 1/words_per_bit, Hq/0.69315))
stats = {
"ppl": math.exp(nll),
"kl": kl,
"wordsbit": words_per_bit,
"entropy": Hq/0.69315
}
# return text, stats
return text,stats["ppl"], stats["kl"], stats["wordsbit"]
def decode_message(mode, text, context):
enc, model = get_model(model_name='gpt2')
## PARAMETERS
unicode_enc = False
# mode = 'meteor'
block_size = 3 # for huffman and bins
temp = 0.9 # for arithmetic
precision = 26 # for arithmetic
sample_tokens = 100 # for sample
topk = 300
finish_sent=True # whether or not to force finish sent. If so, stats displayed will be for non-finished sentence
meteor_sort = False
meteor_random = False
key = b'0x01'*64
sample_seed_prefix = b'sample'
nonce = b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
## VALIDATE PARAMETERS
if mode not in ['meteor', 'arithmetic', 'huffman', 'bins', 'sample']:
raise NotImplementedError
if mode == 'bins':
bin2words, words2bin = get_bins(len(enc.encoder), block_size)
context_tokens = encode_context(context, enc)
if mode != 'sample':
if mode == 'arithmetic':
message_rec = decode_arithmetic(model, enc, text, context_tokens, temp=temp, precision=precision, topk=topk)
elif mode == 'huffman':
message_rec = decode_huffman(model, enc, text, context_tokens, block_size)
elif mode == 'bins':
message_rec = decode_block(model, enc, text, context_tokens, block_size, bin2words, words2bin)
elif mode == 'meteor':
message_rec = decode_meteor(model, enc, text, context_tokens, temp=temp,
precision=precision, topk=topk, is_sort=meteor_sort, input_key=key, input_nonce=nonce)
print("="*35 + " Recovered Message " + "="*35)
# print(message_rec)
# print("=" * 80)
# Finally map message bits back to original text
if unicode_enc:
message_rec = [bool(item) for item in message_rec]
ba = bitarray.bitarray(message_rec)
reconst = ba.tobytes().decode('utf-8', 'ignore')
else:
message_ctx = [enc.encoder['<|endoftext|>']]
reconst = encode_arithmetic(model, enc, message_rec, message_ctx, precision=40, topk=60000)
reconst = enc.decode(reconst[0])
print(reconst[:-5])
print("=" * 80)
return reconst[:-5]
# def main():
# chosen_context = "Despite a long history of research and wide-spread applications to censorship resistant systems, practical steganographic systems capable of embedding messages into realistic communication distributions, like text, do not exist." #@param ["Washington received his initial military training and command with the Virginia Regiment during the French and Indian War. He was later elected to the Virginia House of Burgesses and was named a delegate to the Continental Congress, where he was appointed Commanding General of the nation's Continental Army. Washington led American forces, allied with France, in the defeat of the British at Yorktown. Once victory for the United States was in hand in 1783, Washington resigned his commission.", "The Alvarez hypothesis posits that the mass extinction of the dinosaurs and many other living things during the Cretaceous-Paleogene extinction event was caused by the impact of a large asteroid on the Earth. Prior to 2013, it was commonly cited as having happened about 65 million years ago, but Renne and colleagues (2013) gave an updated value of 66 million years. Evidence indicates that the asteroid fell in the Yucatan Peninsula, at Chicxulub, Mexico. The hypothesis is named after the father-and-son team of scientists Luis and Walter Alvarez, who first suggested it in 1980. Shortly afterwards, and independently, the same was suggested by Dutch paleontologist Jan Smit.", "Despite a long history of research and wide-spread applications to censorship resistant systems, practical steganographic systems capable of embedding messages into realistic communication distributions, like text, do not exist."] {allow-input: true}
# # #@title { run: "auto", display-mode: "form" }
# message_text = "generate text!" #@param {type:"string"}
# mode = input("Please enter mode (meteor, arithmetic, huffman, bins, or sample): ")
# #@title Run me!
# #@markdown Make sure to re-run this cell if you change the parameters above.
# x = encode_message(mode, message_text, chosen_context)
# # print(x[0])
# y = decode_message(mode, x[0], chosen_context)
# if __name__ == '__main__':
# main()
# chosen_context = "Despite a long history of research and wide-spread applications to censorship resistant systems, practical steganographic systems capable of embedding messages into realistic communication distributions, like text, do not exist." #@param ["Washington received his initial military training and command with the Virginia Regiment during the French and Indian War. He was later elected to the Virginia House of Burgesses and was named a delegate to the Continental Congress, where he was appointed Commanding General of the nation's Continental Army. Washington led American forces, allied with France, in the defeat of the British at Yorktown. Once victory for the United States was in hand in 1783, Washington resigned his commission.", "The Alvarez hypothesis posits that the mass extinction of the dinosaurs and many other living things during the Cretaceous-Paleogene extinction event was caused by the impact of a large asteroid on the Earth. Prior to 2013, it was commonly cited as having happened about 65 million years ago, but Renne and colleagues (2013) gave an updated value of 66 million years. Evidence indicates that the asteroid fell in the Yucatan Peninsula, at Chicxulub, Mexico. The hypothesis is named after the father-and-son team of scientists Luis and Walter Alvarez, who first suggested it in 1980. Shortly afterwards, and independently, the same was suggested by Dutch paleontologist Jan Smit.", "Despite a long history of research and wide-spread applications to censorship resistant systems, practical steganographic systems capable of embedding messages into realistic communication distributions, like text, do not exist."] {allow-input: true}
# # chosen_context = "Washington received his initial military training and command with the Virginia Regiment during the French and Indian War. He was later elected to the Virginia House of Burgesses and was named a delegate to the Continental Congress, where he was appointed Commanding General of the nation's Continental Army. Washington led American forces, allied with France, in the defeat of the British at Yorktown. Once victory for the United States was in hand in 1783, Washington resigned his commission."
# # chosen_context += "\n\n" # to add a little spacing
# # #@title { run: "auto", display-mode: "form" }
# message_text = "generate text!" #@param {type:"string"}
# mode = input("Please enter mode (arithmetic, huffman, bins, or sample): ")
# #@title Run me!
# #@markdown Make sure to re-run this cell if you change the parameters above.
# x = encode_message(mode, message_text, chosen_context)
# # print(x[0])
# y = decode_message(mode, x[0], chosen_context) |