Spaces:
Sleeping
Sleeping
File size: 3,927 Bytes
86a2bff 58773bb f9437fe 9cbc665 58773bb b1fd156 1d56a5d 8ef67fc 25d463a 2bce853 25d463a 1d56a5d 8ef67fc cecf2b8 1d56a5d f3fdb65 10ca2d7 a17ae36 66a9140 f3fdb65 c485ef9 885832d c485ef9 1d7fc0d a17ae36 1d7fc0d 1f9e351 1d7fc0d c485ef9 c1aaef4 f3fdb65 8c2cd7c 2c26c26 f3fdb65 76d23b8 f3fdb65 76d23b8 c485ef9 0ed5800 8c2cd7c 1d7fc0d 96362ce 9f73464 1d7fc0d b4f4d50 6bd5d21 b4f4d50 f3fdb65 1f9e351 0ed5800 f3fdb65 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import gradio as gr
import os
import re
import subprocess
import tempfile
model = gr.load("ejschwartz/oo-method-test-model-bylibrary", src="models")
def get_all_dis(bname, addrs=None):
anafile = tempfile.NamedTemporaryFile(prefix=os.path.basename(bname) + "_", suffix=".bat_ana")
ananame = anafile.name
addrstr = ""
if addrs is not None:
addrstr = " ".join([f"--function-at {x}" for x in addrs])
subprocess.check_output(f"bat-ana {addrstr} --no-post-analysis -o {ananame} {bname} 2>/dev/null", shell=True)
output = subprocess.check_output(f"bat-dis --no-insn-address --no-bb-cfg-arrows --color=off {ananame} 2>/dev/null", shell=True)
output = re.sub(b' +', b' ', output)
func_dis = {}
last_func = None
current_output = []
for l in output.splitlines():
if l.startswith(b";;; function 0x"):
if last_func is not None:
func_dis[last_func] = b"\n".join(current_output)
last_func = int(l.split()[2], 16)
current_output.clear()
if not b";;" in l:
current_output.append(l)
if last_func is not None:
if last_func in func_dis:
print("Warning: Ignoring multiple functions at the same address")
else:
func_dis[last_func] = b"\n".join(current_output)
return func_dis
def get_funs(f):
funs = get_all_dis(f.name)
return "\n".join(("%#x" % addr) for addr in funs.keys())
with gr.Blocks() as demo:
all_dis_state = gr.State()
gr.Markdown(
"""
# Function/Method Detector
First, upload a binary.
This model was only trained on 32-bit MSVC++ binaries. You can provide
other types of binaries, but the result will probably be gibberish.
"""
)
file_widget = gr.File(label="Binary file")
with gr.Column(visible=False) as col:
#output = gr.Textbox("Output")
gr.Markdown("""
Great, you selected an executable! Now pick the function you would like to analyze.
""")
fun_dropdown = gr.Dropdown(label="Select a function", choices=["Woohoo!"], interactive=True)
gr.Markdown("""
Below you can find the selected function's disassembly, and the model's
prediction of whether the function is an object-oriented method or a
regular function.
""")
with gr.Row(visible=True) as result:
disassembly = gr.Textbox(label="Disassembly", lines=20)
clazz = gr.Label()
example_widget = gr.Examples(
examples=[f.path for f in os.scandir(os.path.join(os.path.dirname(__file__), "examples"))],
inputs=file_widget,
outputs=[all_dis_state, disassembly, clazz]
)
def file_change_fn(file):
if file is None:
return {col: gr.update(visible=False),
all_dis_state: None}
else:
#fun_data = {42: 2, 43: 3}
fun_data = get_all_dis(file.name)
addrs = ["%#x" % addr for addr in fun_data.keys()]
return {col: gr.update(visible=True),
fun_dropdown: gr.Dropdown.update(choices=addrs, value=addrs[0]),
all_dis_state: fun_data
}
def function_change_fn(selected_fun, fun_data):
disassembly_str = fun_data[int(selected_fun, 16)].decode("utf-8")
load_results = model.fn(disassembly_str)
top_k = {e['label']: e['confidence'] for e in load_results['confidences']}
return {disassembly: gr.Textbox.update(value=disassembly_str),
clazz: gr.Label.update(top_k)
}
file_widget.change(file_change_fn, file_widget, [col, fun_dropdown, all_dis_state])
fun_dropdown.change(function_change_fn, [fun_dropdown, all_dis_state], [disassembly, clazz])
demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
|