llm4decompile-9b-v2

Running on Zero

App Files Files Community

ejschwartz commited on Feb 17

Commit

536fde5

verified ·

1 Parent(s): cacf2b7

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -4

app.py CHANGED Viewed

@@ -2,15 +2,25 @@ import gradio as gr
 import spaces
 import transformers
-pipe = transformers.pipeline("text2text-generation", model="ejschwartz/slade-x86-O3")
 @spaces.GPU
 def predict(asm):
-    o = pipe(asm, max_length=1024, num_beams=5, num_return_sequences=1)
-    return o[0]['generated_text']
 demo = gr.Interface(fn=predict,
-                    examples=[".globl sysv_calc\n.type sysv_calc, @function\nsysv_calc:\n.LFB0:\n\t.cfi_startproc\n\tendbr64\n\tpushq\t%rbp\n\t.cfi_def_cfa_offset 16\n\t.cfi_offset 6, -16\n\tpushq\t%rbx\n\t.cfi_def_cfa_offset 24\n\t.cfi_offset 3, -24\n\tmovq\t%rdx, %rbx\n\tsubq\t$8, %rsp\n\t.cfi_def_cfa_offset 32\n\tmovq\t24(%rdx), %rdx\n\tmovl\t(%rsi), %esi\n\tcall\telf_strptr@PLT\n\tmovq\t%rax, %rbp\n\tmovq\t(%rbx), %rax\n\tcmpq\tSHT_SYMTAB(%rip), %rax\n\tje\t.L2\n\tcmpq\tSHT_STRTAB(%rip), %rax\n\tje\t.L2\n\tcmpq\tSHT_RELA(%rip), %rax\n\tje\t.L2\n\tcmpq\tSHT_REL(%rip), %rax\n\tjne\t.L3\n\t.p2align 4,,10\n\t.p2align 3\n.L2:\n\tcmpq\t$0, 8(%rbx)\n\tje\t.L1\n.L3:\n\txorl\t%eax, %eax\n\tcall\ttbl_append@PLT\n\txorl\t%esi, %esi\n\tmovq\t%rbp, %rdi\n\tcall\ttbl_print@PLT\n\tmovq\t16(%rbx), %rdi\n\tmovl\tradix(%rip), %esi\n\tmovl\t$1, %edx\n\tcall\ttbl_print_num@PLT\n\tmovq\t8(%rbx), %rdi\n\tmovl\tradix(%rip), %esi\n\tmovl\t$2, %edx\n\tcall\ttbl_print_num@PLT\n\tmovq\t16(%rbx), %rax\n\taddl\t%eax, text_size_total(%rip)\n.L1:\n\taddq\t$8, %rsp\n\t.cfi_def_cfa_offset 24\n\tpopq\t%rbx\n\t.cfi_def_cfa_offset 16\n\tpopq\t%rbp\n\t.cfi_def_cfa_offset 8\n\tret\n\t.cfi_endproc\n\n"],
                     inputs="text", outputs="text")
 demo.queue()
 demo.launch()

 import spaces
 import transformers
+model_path = 'LLM4Binary/llm4decompile-6.7b-v2' # V2 Model
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16).cuda()
 @spaces.GPU
 def predict(asm):
+    before = f"# This is the assembly code:\n"#prompt
+    after = "\n# What is the source code?\n"#prompt
+    input_prompt = before+input_asm.strip()+after
+    inputs = tokenizer(input_prompt, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        outputs = model.generate(**inputs, max_new_tokens=2048)### max length to 4096, max new tokens should be below the range
+    c_func_decompile = tokenizer.decode(outputs[0][len(inputs[0]):-1])
+    return c_func_decompile
 demo = gr.Interface(fn=predict,
+                    examples=["void ioabs_tcp_pre_select(connection c, int *n, struct pollfd *pfds) { struct ioabs_tcp *io; io = (struct ioabs_tcp*)c->io; c->s_index = *n; (*n)++; pfds[c->s_index].fd = c->s; pfds[c->s_index].events |= 0x0001; if (((size_t)(((c->wrb)->put + (c->wrb)->len - (c->wrb)->get) % (c->wrb)->len)) > 0) pfds[c->s_index].events |= 0x0004; }"],
                     inputs="text", outputs="text")
 demo.queue()
 demo.launch()