File size: 3,686 Bytes
9e066ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
'''
Adapted from OpenAssistant's original xor_codec.py:
https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor/raw/main/xor_codec.py
'''
import os
import sys
import shutil
import gzip
import numpy
from pathlib import Path

def xor_uncompressed(dst, src_payload, src_base, block_size=4096):
    fp_payload = open(src_payload, 'rb')
    fp_base = open(src_base, 'rb')
    with open(dst, 'wb') as fp:
        while True:
            buf1 = numpy.array(bytearray(fp_payload.read(block_size)), dtype=numpy.uint8)
            buf2 = numpy.array(bytearray(fp_base.read(block_size)), dtype=numpy.uint8)
            padding = len(buf1) - len(buf2)
            if padding > 0: buf2 = numpy.pad(buf2, (0, padding), 'constant', constant_values=(0,))
            if padding < 0: buf2 = buf2[:len(buf1)]
            buf = numpy.bitwise_xor(buf1, buf2)
            fp.write(buf)
            if len(buf1) < block_size: break
    fp_payload.close()
    fp_base.close()

def xor_encode(dst, src_payload, src_base, block_size=4096):
    fp_payload = open(src_payload, 'rb')
    fp_base = open(src_base, 'rb')
    with gzip.open(dst, 'wb') as fp:
        while True:
            buf1 = numpy.array(bytearray(fp_payload.read(block_size)), dtype=numpy.uint8)
            buf2 = numpy.array(bytearray(fp_base.read(block_size)), dtype=numpy.uint8)
            padding = len(buf1) - len(buf2)
            if padding > 0: buf2 = numpy.pad(buf2, (0, padding), 'constant', constant_values=(0,))
            if padding < 0: buf2 = buf2[:len(buf1)]
            buf = numpy.bitwise_xor(buf1, buf2)
            fp.write(buf)
            if len(buf1) < block_size: break
    fp_payload.close()
    fp_base.close()

def xor_decode(dst, src_payload, src_base, block_size=4096):
    fp_payload = gzip.open(src_payload, 'rb')
    fp_base = open(src_base, 'rb')
    with open(dst, 'wb') as fp:
        while True:
            buf1 = numpy.array(bytearray(fp_payload.read(block_size)), dtype=numpy.uint8)
            buf2 = numpy.array(bytearray(fp_base.read(block_size)), dtype=numpy.uint8)
            padding = len(buf1) - len(buf2)
            if padding > 0: buf2 = numpy.pad(buf2, (0, padding), 'constant', constant_values=(0,))
            if padding < 0: buf2 = buf2[:len(buf1)]
            buf = numpy.bitwise_xor(buf1, buf2)
            fp.write(buf)
            if len(buf1) < block_size: break
    fp_payload.close()
    fp_base.close()

def xor_dir(dst, src_payload, src_base, decode=True, compress=True):
    if compress:
        xor = xor_decode if decode else xor_encode
    else:
        xor = xor_uncompressed
    Path(dst).mkdir(parents=True, exist_ok=True)
    for path in os.listdir(src_payload):
        # Don't care about uncopyrightable text files, just copy over.
        if ".json" in path:
            print("[*] Copying '%s'" % path)
            shutil.copy(f"{src_payload}/{path}", f"{dst}/{path}")
            continue

        print("[*] Processing '%s'" % path)
        try:
            xor("%s/%s" % (dst, path), "%s/%s" % (src_payload, path), "%s/%s" % (src_base, path))
        except Exception as e:
            print("Exception when processing '%s'" % path)

if __name__ == "__main__":
    if len(sys.argv) < 4:
        print("Usage: xor.py <DESTINATION> <PAYLOAD SOURCE> <LLAMA SOURCE> [--encode] [--compress]")
        exit()
    dst = sys.argv[1]
    src_payload = sys.argv[2]
    src_base = sys.argv[3]
    decode = True
    compress = False
    if len(sys.argv) > 4:
        for arg in sys.argv[4:]:
            if arg == "--encode": decode = False
            if arg == "--compress": compress = True
    xor_dir(dst, src_payload, src_base, decode=decode, compress=compress)