File size: 6,028 Bytes
86a2bff
 
58773bb
f9437fe
9cbc665
58773bb
 
b4f4d50
 
568b52b
 
 
b4f4d50
1d56a5d
 
 
 
 
 
 
 
 
25d463a
8ef67fc
25d463a
2bce853
 
 
25d463a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d56a5d
8ef67fc
cecf2b8
1d56a5d
f3fdb65
 
 
 
 
 
 
0ed5800
f3fdb65
 
 
 
 
c485ef9
 
 
1d7fc0d
 
 
 
c485ef9
f3fdb65
 
 
 
 
 
 
8c2cd7c
2c26c26
f3fdb65
76d23b8
 
f3fdb65
76d23b8
c485ef9
 
0ed5800
8c2cd7c
1d7fc0d
 
b4f4d50
1d7fc0d
 
b4f4d50
 
 
 
f3fdb65
 
 
0ed5800
 
f3fdb65
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import gradio as gr

import os
import re
import subprocess
import tempfile

import transformers

import numpy
print(numpy.__version__)

pipe = transformers.pipeline(model="ejschwartz/oo-method-test-model-bylibrary", return_all_scores=True)

# gr.load("ejschwartz/oo-method-test-model-bylibrary", src="models", examples=[
#     '\nL1: 55 ?? push ebp\n 8b ec ?? mov ebp, esp\n 5d ?? pop ebp\n c3 ?? ret\n\n',
#     '\nL1: 55 ?? push ebp\n 8b ec ?? mov ebp, esp\n 51 ?? push ecx\n a1 b0 5d 43 00 ?? mov eax, dword ds:[0x00435db0]\n 83 f8 fe ?? cmp eax, 0xfe<254,-2>\n 75 0a ?? jne basic block L4\n\nL2: e8 4e 17 00 00 ?? call function 0x00415d25\n\nL3: a1 b0 5d 43 00 ?? mov eax, dword ds:[0x00435db0]\n\nL4: 83 f8 ff ?? cmp eax, 0xff<255,-1>\n 75 07 ?? jne basic block L6\n\nL5: b8 ff ff 00 00 ?? mov eax, 0x0000ffff\n eb 1b ?? jmp basic block L9\n\nL6: 6a 00 ?? push 0\n 8d 4d fc ?? lea ecx, ss:[ebp + 0xfc<252,-4>]\n 51 ?? push ecx\n 6a 01 ?? push 1\n 8d 4d 08 ?? lea ecx, ss:[ebp + 8]\n 51 ?? push ecx\n 50 ?? push eax\n ff 15 28 91 43 00 ?? call dword ds:[0x00439128]\n\nL7: 85 c0 ?? test eax, eax\n 74 e2 ?? je basic block L5\n\nL8: 66 8b 45 08 ?? mov ax, word ss:[ebp + 8]\n\nL9: 8b e5 ?? mov esp, ebp\n 5d ?? pop ebp\n c3 ?? ret\n\n',
#     '\nL1: 0f b7 41 32 ?? movzx eax, word ds:[ecx + 0x32<50>]\n 83 e8 20 ?? sub eax, 0x20<32>\n 74 2d ?? je basic block L10\n\nL2: 83 e8 03 ?? sub eax, 3\n 74 22 ?? je basic block L9\n\nL3: 83 e8 08 ?? sub eax, 8\n 74 17 ?? je basic block L8\n\nL4: 48 ?? dec eax\n 83 e8 01 ?? sub eax, 1\n 74 0b ?? je basic block L7\n\nL5: 83 e8 03 ?? sub eax, 3\n 75 1c ?? jne basic block L11\n\nL6: 83 49 20 08 ?? or dword ds:[ecx + 0x20<32>], 8\n eb 16 ?? jmp basic block L11\n\nL7: 83 49 20 04 ?? or dword ds:[ecx + 0x20<32>], 4\n eb 10 ?? jmp basic block L11\n\nL8: 83 49 20 01 ?? or dword ds:[ecx + 0x20<32>], 1\n eb 0a ?? jmp basic block L11\n\nL9: 83 49 20 20 ?? or dword ds:[ecx + 0x20<32>], 0x20<32>\n eb 04 ?? jmp basic block L11\n\nL10: 83 49 20 02 ?? or dword ds:[ecx + 0x20<32>], 2\n\nL11: b0 01 ?? mov al, 1\n c3 ?? ret\n\n',
#     "\nL1: 8b ff ?? mov edi, edi\n 55 ?? push ebp\n 8b ec ?? mov ebp, esp\n 83 ec 08 ?? sub esp, 8\n 89 4d f8 ?? mov dword ss:[ebp + 0xf8<248,-8>], ecx\n 8b 4d f8 ?? mov ecx, dword ss:[ebp + 0xf8<248,-8>]\n e8 e6 ac f9 ff ?? call function 0x00401569\n\nL2: 23 45 08 ?? and eax, dword ss:[ebp + 8]\n 3b 45 08 ?? cmp eax, dword ss:[ebp + 8]\n 75 09 ?? jne basic block L4\n\nL3: c7 45 fc 01 00 00 00 ?? mov dword ss:[ebp + 0xfc<252,-4>], 1\n eb 07 ?? jmp basic block L5\n\nL4: c7 45 fc 00 00 00 00 ?? mov dword ss:[ebp + 0xfc<252,-4>], 0\n\nL5: 8a 45 fc ?? mov al, byte ss:[ebp + 0xfc<252,-4>]\n 8b e5 ?? mov esp, ebp\n 5d ?? pop ebp\n c2 04 00 ?? ret 4\n\n"
# ],
#                  live=True, title="Is it a method or a function?").launch(server_name="0.0.0.0", server_port=7860)


def get_all_dis(bname, addrs=None):

    anafile = tempfile.NamedTemporaryFile(prefix=os.path.basename(bname) + "_", suffix=".bat_ana")
    ananame = anafile.name

    addrstr = ""
    if addrs is not None:
        addrstr = " ".join([f"--function-at {x}" for x in addrs])

    subprocess.check_output(f"bat-ana {addrstr} --no-post-analysis -o {ananame} {bname} 2>/dev/null", shell=True)


    output = subprocess.check_output(f"bat-dis --no-insn-address --no-bb-cfg-arrows --color=off {ananame} 2>/dev/null", shell=True)
    output = re.sub(b' +', b' ', output)

    func_dis = {}
    last_func = None
    current_output = []

    for l in output.splitlines():
        if l.startswith(b";;; function 0x"):
            if last_func is not None:
                func_dis[last_func] = b"\n".join(current_output)
            last_func = int(l.split()[2], 16)
            current_output.clear()

        if not b";;" in l:
            current_output.append(l)

    if last_func is not None:
        if last_func in func_dis:
            print("Warning: Ignoring multiple functions at the same address")
        else:
            func_dis[last_func] = b"\n".join(current_output)

    return func_dis

def get_funs(f):
    funs = get_all_dis(f.name)
    return "\n".join(("%#x" % addr) for addr in funs.keys())

with gr.Blocks() as demo:

    all_dis_state = gr.State()

    gr.Markdown(
        """
    # Function/Method Detector
    First, upload a binary. Then, select a function from the dropdown. The function's disassembly will be displayed below.
    """
    )

    file_widget = gr.File(label="Binary file")

    with gr.Column(visible=False) as col:
        #output = gr.Textbox("Output")
        fun_dropdown = gr.Dropdown(label="Select a function", choices=["Woohoo!"], interactive=True)

        with gr.Row(visible=True) as result:
            disassembly = gr.Textbox(label="Disassembly", lines=20, readonly=True)
            clazz = gr.Label()

    def file_change_fn(file):

        if file is None:
            return {col: gr.update(visible=False),
                    all_dis_state: None}
        else:

            #fun_data = {42: 2, 43: 3}
            fun_data = get_all_dis(file.name)

            addrs = ["%#x" % addr for addr in fun_data.keys()]

            return {col: gr.update(visible=True),
                    fun_dropdown: gr.Dropdown.update(choices=addrs, value=addrs[0]),
                    all_dis_state: fun_data
                    }
        
    def function_change_fn(selected_fun, fun_data):

        disassembly_str = fun_data[int(selected_fun, 16)].decode("utf-8")
        model_results = pipe(disassembly_str)
        disassembly_str = str(model_results)

        return {disassembly: gr.Textbox.update(value=disassembly_str),
                #class: gr.Label.update()
                #clazz: gr.Label.update(value="Method" if model_results[0]["label"] == "method" else "Function")}
        }

    file_widget.change(file_change_fn, file_widget, [col, fun_dropdown, all_dis_state])

    fun_dropdown.change(function_change_fn, [fun_dropdown, all_dis_state], [disassembly])

demo.launch(server_name="0.0.0.0", server_port=7860, share=True)