Spaces:
Sleeping
Sleeping
import numpy as np | |
import bitarray | |
import sys | |
import re | |
import math | |
from meteor import encode_meteor,decode_meteor | |
from utils import get_model, encode_context | |
from arithmetic import encode_arithmetic, decode_arithmetic | |
from block_baseline import get_bins, encode_block, decode_block | |
from huffman_baseline import encode_huffman, decode_huffman | |
from sample import sample | |
def encode_message(mode, message_str, context): | |
enc, model = get_model(model_name='gpt2') | |
## PARAMETERS | |
# message_str = input("input secret message:") | |
unicode_enc = False | |
# mode = 'meteor' | |
# mode = input("Please enter mode (arithmetic, huffman, bins, or sample): ") | |
block_size = 3 # for huffman and bins | |
temp = 0.9 # for arithmetic | |
precision = 26 # for arithmetic | |
sample_tokens = 100 # for sample | |
topk = 300 | |
finish_sent=True # whether or not to force finish sent. If so, stats displayed will be for non-finished sentence | |
meteor_sort = False | |
meteor_random = False | |
key = b'0x01'*64 | |
sample_seed_prefix = b'sample' | |
nonce = b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' | |
## VALIDATE PARAMETERS | |
if mode not in ['meteor', 'arithmetic', 'huffman', 'bins']: | |
raise NotImplementedError | |
if mode == 'bins': | |
bin2words, words2bin = get_bins(len(enc.encoder), block_size) | |
# context = \ | |
# """Washington received his initial military training and command with the Virginia Regiment during the French and Indian War. He was later elected to the Virginia House of Burgesses and was named a delegate to the Continental Congress, where he was appointed Commanding General of the nation's Continental Army. Washington led American forces, allied with France, in the defeat of the British at Yorktown. Once victory for the United States was in hand in 1783, Washington resigned his commission. | |
# """ | |
# context = "Despite a long history of research and wide-spread applications to censorship resistant systems, practical steganographic systems capable of embedding messages into realistic communication distributions, like text, do not exist." #@param ["Washington received his initial military training and command with the Virginia Regiment during the French and Indian War. He was later elected to the Virginia House of Burgesses and was named a delegate to the Continental Congress, where he was appointed Commanding General of the nation's Continental Army. Washington led American forces, allied with France, in the defeat of the British at Yorktown. Once victory for the United States was in hand in 1783, Washington resigned his commission.", "The Alvarez hypothesis posits that the mass extinction of the dinosaurs and many other living things during the Cretaceous-Paleogene extinction event was caused by the impact of a large asteroid on the Earth. Prior to 2013, it was commonly cited as having happened about 65 million years ago, but Renne and colleagues (2013) gave an updated value of 66 million years. Evidence indicates that the asteroid fell in the Yucatan Peninsula, at Chicxulub, Mexico. The hypothesis is named after the father-and-son team of scientists Luis and Walter Alvarez, who first suggested it in 1980. Shortly afterwards, and independently, the same was suggested by Dutch paleontologist Jan Smit.", "Despite a long history of research and wide-spread applications to censorship resistant systems, practical steganographic systems capable of embedding messages into realistic communication distributions, like text, do not exist."] {allow-input: true} | |
context_tokens = encode_context(context, enc) | |
# ------------------------------------------------------------------------------------ | |
# ------------------------------------------------------------------------------------ | |
# First encode message to uniform bits, without any context | |
# (not essential this is arithmetic vs ascii, but it's more efficient when the message is natural language) | |
if unicode_enc: | |
ba = bitarray.bitarray() | |
ba.frombytes(message_str.encode('utf-8')) | |
message = ba.tolist() | |
else: | |
message_ctx = [enc.encoder['<|endoftext|>']] | |
message_str += '<eos>' | |
message = decode_arithmetic(model, enc, message_str, message_ctx, precision=40, topk=60000) | |
# Next encode bits into cover text, using arbitrary context | |
Hq = 0 | |
if mode == 'arithmetic': | |
out, nll, kl, words_per_bit = encode_arithmetic(model, enc, message, context_tokens, temp=temp, finish_sent=finish_sent, precision=precision, topk=topk) | |
elif mode == 'huffman': | |
out, nll, kl, words_per_bit = encode_huffman(model, enc, message, context_tokens, block_size, finish_sent=finish_sent) | |
elif mode == 'bins': | |
out, nll, kl, words_per_bit = encode_block(model, enc, message, context_tokens, block_size, bin2words, words2bin, finish_sent=finish_sent) | |
elif mode == 'meteor': | |
out, nll, kl, words_per_bit = encode_meteor(model, enc, message, context_tokens, temp=temp, finish_sent=finish_sent, | |
precision=precision, topk=topk, is_sort=meteor_sort, randomize_key=meteor_random, input_key=key, input_nonce=nonce) | |
elif mode == 'sample': | |
out, nll, kl, Hq = sample(model, enc, sample_tokens, context_tokens, temperature=temp, topk=topk) | |
words_per_bit = 1 | |
text = enc.decode(out) | |
# print(message) | |
# print(len(message)) | |
print("="*40 + " Encoding " + "="*40) | |
print(text) | |
print('ppl: %0.2f, kl: %0.3f, words/bit: %0.2f, bits/word: %0.2f, entropy: %.2f' % (math.exp(nll), kl, words_per_bit, 1/words_per_bit, Hq/0.69315)) | |
stats = { | |
"ppl": math.exp(nll), | |
"kl": kl, | |
"wordsbit": words_per_bit, | |
"entropy": Hq/0.69315 | |
} | |
# return text, stats | |
return text,stats["ppl"], stats["kl"], stats["wordsbit"] | |
def decode_message(mode, text, context): | |
enc, model = get_model(model_name='gpt2') | |
## PARAMETERS | |
unicode_enc = False | |
# mode = 'meteor' | |
block_size = 3 # for huffman and bins | |
temp = 0.9 # for arithmetic | |
precision = 26 # for arithmetic | |
sample_tokens = 100 # for sample | |
topk = 300 | |
finish_sent=True # whether or not to force finish sent. If so, stats displayed will be for non-finished sentence | |
meteor_sort = False | |
meteor_random = False | |
key = b'0x01'*64 | |
sample_seed_prefix = b'sample' | |
nonce = b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' | |
## VALIDATE PARAMETERS | |
if mode not in ['meteor', 'arithmetic', 'huffman', 'bins', 'sample']: | |
raise NotImplementedError | |
if mode == 'bins': | |
bin2words, words2bin = get_bins(len(enc.encoder), block_size) | |
context_tokens = encode_context(context, enc) | |
if mode != 'sample': | |
if mode == 'arithmetic': | |
message_rec = decode_arithmetic(model, enc, text, context_tokens, temp=temp, precision=precision, topk=topk) | |
elif mode == 'huffman': | |
message_rec = decode_huffman(model, enc, text, context_tokens, block_size) | |
elif mode == 'bins': | |
message_rec = decode_block(model, enc, text, context_tokens, block_size, bin2words, words2bin) | |
elif mode == 'meteor': | |
message_rec = decode_meteor(model, enc, text, context_tokens, temp=temp, | |
precision=precision, topk=topk, is_sort=meteor_sort, input_key=key, input_nonce=nonce) | |
print("="*35 + " Recovered Message " + "="*35) | |
# print(message_rec) | |
# print("=" * 80) | |
# Finally map message bits back to original text | |
if unicode_enc: | |
message_rec = [bool(item) for item in message_rec] | |
ba = bitarray.bitarray(message_rec) | |
reconst = ba.tobytes().decode('utf-8', 'ignore') | |
else: | |
message_ctx = [enc.encoder['<|endoftext|>']] | |
reconst = encode_arithmetic(model, enc, message_rec, message_ctx, precision=40, topk=60000) | |
reconst = enc.decode(reconst[0]) | |
print(reconst[:-5]) | |
print("=" * 80) | |
return reconst[:-5] | |
# def main(): | |
# chosen_context = "Despite a long history of research and wide-spread applications to censorship resistant systems, practical steganographic systems capable of embedding messages into realistic communication distributions, like text, do not exist." #@param ["Washington received his initial military training and command with the Virginia Regiment during the French and Indian War. He was later elected to the Virginia House of Burgesses and was named a delegate to the Continental Congress, where he was appointed Commanding General of the nation's Continental Army. Washington led American forces, allied with France, in the defeat of the British at Yorktown. Once victory for the United States was in hand in 1783, Washington resigned his commission.", "The Alvarez hypothesis posits that the mass extinction of the dinosaurs and many other living things during the Cretaceous-Paleogene extinction event was caused by the impact of a large asteroid on the Earth. Prior to 2013, it was commonly cited as having happened about 65 million years ago, but Renne and colleagues (2013) gave an updated value of 66 million years. Evidence indicates that the asteroid fell in the Yucatan Peninsula, at Chicxulub, Mexico. The hypothesis is named after the father-and-son team of scientists Luis and Walter Alvarez, who first suggested it in 1980. Shortly afterwards, and independently, the same was suggested by Dutch paleontologist Jan Smit.", "Despite a long history of research and wide-spread applications to censorship resistant systems, practical steganographic systems capable of embedding messages into realistic communication distributions, like text, do not exist."] {allow-input: true} | |
# # #@title { run: "auto", display-mode: "form" } | |
# message_text = "generate text!" #@param {type:"string"} | |
# mode = input("Please enter mode (meteor, arithmetic, huffman, bins, or sample): ") | |
# #@title Run me! | |
# #@markdown Make sure to re-run this cell if you change the parameters above. | |
# x = encode_message(mode, message_text, chosen_context) | |
# # print(x[0]) | |
# y = decode_message(mode, x[0], chosen_context) | |
# if __name__ == '__main__': | |
# main() | |
# chosen_context = "Despite a long history of research and wide-spread applications to censorship resistant systems, practical steganographic systems capable of embedding messages into realistic communication distributions, like text, do not exist." #@param ["Washington received his initial military training and command with the Virginia Regiment during the French and Indian War. He was later elected to the Virginia House of Burgesses and was named a delegate to the Continental Congress, where he was appointed Commanding General of the nation's Continental Army. Washington led American forces, allied with France, in the defeat of the British at Yorktown. Once victory for the United States was in hand in 1783, Washington resigned his commission.", "The Alvarez hypothesis posits that the mass extinction of the dinosaurs and many other living things during the Cretaceous-Paleogene extinction event was caused by the impact of a large asteroid on the Earth. Prior to 2013, it was commonly cited as having happened about 65 million years ago, but Renne and colleagues (2013) gave an updated value of 66 million years. Evidence indicates that the asteroid fell in the Yucatan Peninsula, at Chicxulub, Mexico. The hypothesis is named after the father-and-son team of scientists Luis and Walter Alvarez, who first suggested it in 1980. Shortly afterwards, and independently, the same was suggested by Dutch paleontologist Jan Smit.", "Despite a long history of research and wide-spread applications to censorship resistant systems, practical steganographic systems capable of embedding messages into realistic communication distributions, like text, do not exist."] {allow-input: true} | |
# # chosen_context = "Washington received his initial military training and command with the Virginia Regiment during the French and Indian War. He was later elected to the Virginia House of Burgesses and was named a delegate to the Continental Congress, where he was appointed Commanding General of the nation's Continental Army. Washington led American forces, allied with France, in the defeat of the British at Yorktown. Once victory for the United States was in hand in 1783, Washington resigned his commission." | |
# # chosen_context += "\n\n" # to add a little spacing | |
# # #@title { run: "auto", display-mode: "form" } | |
# message_text = "generate text!" #@param {type:"string"} | |
# mode = input("Please enter mode (arithmetic, huffman, bins, or sample): ") | |
# #@title Run me! | |
# #@markdown Make sure to re-run this cell if you change the parameters above. | |
# x = encode_message(mode, message_text, chosen_context) | |
# # print(x[0]) | |
# y = decode_message(mode, x[0], chosen_context) |