Spaces:
Running
Running
import streamlit as st | |
import os | |
import zipfile | |
import tempfile | |
import pefile | |
import subprocess | |
import binascii | |
import re | |
import json | |
from pathlib import Path | |
st.set_page_config(page_title="Binary Decompiler", page_icon="π", layout="wide") | |
st.title("Executable Decompiler Tool") | |
st.markdown(""" | |
This tool decompiles executables to source code: | |
- Extracts assembly instructions from any .exe or .dll | |
- Converts assembly to Python-like or C++-like code | |
- Extracts embedded strings and resources | |
- Works with any executable regardless of original language | |
""") | |
# Install necessary packages at startup | |
try: | |
with st.spinner("Setting up decompilation environment..."): | |
# Install key analysis libraries | |
subprocess.run(["pip", "install", "pyinstaller-extractor"], capture_output=True) | |
subprocess.run(["pip", "install", "uncompyle6"], capture_output=True) | |
subprocess.run(["pip", "install", "capstone"], capture_output=True) | |
subprocess.run(["pip", "install", "r2pipe"], capture_output=True) | |
# Import after installation | |
import r2pipe | |
from capstone import Cs, CS_ARCH_X86, CS_MODE_32, CS_MODE_64 | |
st.success("Environment ready") | |
except Exception as e: | |
st.error(f"Setup error: {str(e)}") | |
def extract_strings(file_path, min_length=4): | |
"""Extract ASCII and Unicode strings from binary file""" | |
try: | |
with open(file_path, 'rb') as f: | |
content = f.read() | |
# Extract ASCII strings | |
ascii_pattern = b'[\\x20-\\x7E]{' + str(min_length).encode() + b',}' | |
ascii_strings = re.findall(ascii_pattern, content) | |
ascii_strings = [s.decode('ascii') for s in ascii_strings] | |
# Extract Unicode strings (basic approach) | |
unicode_pattern = b'(?:[\x20-\x7E]\x00){' + str(min_length).encode() + b',}' | |
unicode_strings = re.findall(unicode_pattern, content) | |
unicode_strings = [s.decode('utf-16-le', errors='ignore').rstrip('\x00') for s in unicode_strings] | |
return sorted(set(ascii_strings + unicode_strings)) | |
except Exception as e: | |
return [f"Error extracting strings: {str(e)}"] | |
def assembly_to_python(assembly_lines): | |
"""Convert assembly code to Python-like code""" | |
python_code = [] | |
python_code.append("# Python code decompiled from assembly") | |
python_code.append("# This is an approximation of the original code") | |
python_code.append("") | |
# Track variables and functions | |
variables = set() | |
functions = set() | |
current_function = None | |
indentation = 0 | |
in_loop = False | |
in_condition = False | |
i = 0 | |
while i < len(assembly_lines): | |
line = assembly_lines[i] | |
# Parse assembly line | |
if ": " in line: | |
parts = line.split(": ", 1) | |
if len(parts) > 1: | |
addr, instruction = parts | |
# Function start detection | |
if "push ebp" in instruction and "mov ebp, esp" in assembly_lines[i+1] if i+1 < len(assembly_lines) else "": | |
current_function = f"function_{addr}" | |
functions.add(current_function) | |
python_code.append(f"\ndef {current_function}():") | |
indentation = 1 | |
i += 2 # Skip the function prologue | |
continue | |
# Function return detection | |
if "ret" in instruction and indentation > 0: | |
python_code.append(f"{' ' * 4 * indentation}return") | |
indentation = 0 | |
current_function = None | |
# Call instruction - function call | |
if "call" in instruction: | |
target = instruction.split("call")[1].strip() | |
if target.startswith("0x"): | |
called_func = f"function_{target}" | |
functions.add(called_func) | |
python_code.append(f"{' ' * 4 * indentation}{called_func}()") | |
else: | |
python_code.append(f"{' ' * 4 * indentation}# Call to external function: {target}") | |
# Variable assignment (mov) | |
elif "mov" in instruction: | |
dest, source = instruction.split("mov")[1].split(",", 1) | |
dest = dest.strip() | |
source = source.strip() | |
if dest not in variables: | |
variables.add(dest) | |
python_code.append(f"{' ' * 4 * indentation}{dest} = {source}") | |
# Comparison and jumps (if statements) | |
elif "cmp" in instruction: | |
parts = instruction.split("cmp")[1].split(",", 1) | |
if len(parts) > 1: | |
a, b = parts | |
a = a.strip() | |
b = b.strip() | |
# Look ahead for jump instruction | |
next_line = assembly_lines[i+1] if i+1 < len(assembly_lines) else "" | |
if "j" in next_line: # Any jump instruction | |
jump_type = next_line.split(": ")[1].split()[0] if ": " in next_line else "" | |
if jump_type == "je" or jump_type == "jz": | |
python_code.append(f"{' ' * 4 * indentation}if {a} == {b}:") | |
elif jump_type == "jne" or jump_type == "jnz": | |
python_code.append(f"{' ' * 4 * indentation}if {a} != {b}:") | |
elif jump_type == "jg" or jump_type == "jnle": | |
python_code.append(f"{' ' * 4 * indentation}if {a} > {b}:") | |
elif jump_type == "jge" or jump_type == "jnl": | |
python_code.append(f"{' ' * 4 * indentation}if {a} >= {b}:") | |
elif jump_type == "jl" or jump_type == "jnge": | |
python_code.append(f"{' ' * 4 * indentation}if {a} < {b}:") | |
elif jump_type == "jle" or jump_type == "jng": | |
python_code.append(f"{' ' * 4 * indentation}if {a} <= {b}:") | |
else: | |
python_code.append(f"{' ' * 4 * indentation}# Comparison: {a} ? {b}") | |
indentation += 1 | |
in_condition = True | |
i += 1 # Skip the jump instruction | |
continue | |
# Loop detection (simplified) | |
elif "loop" in instruction or "jmp" in instruction: | |
if not in_loop: | |
python_code.append(f"{' ' * 4 * indentation}while True: # Loop at {addr}") | |
indentation += 1 | |
in_loop = True | |
else: | |
python_code.append(f"{' ' * 4 * indentation}# Jump or loop at {addr}") | |
# Add/sub operations | |
elif "add" in instruction: | |
parts = instruction.split("add")[1].split(",", 1) | |
if len(parts) > 1: | |
dest, value = parts | |
dest = dest.strip() | |
value = value.strip() | |
python_code.append(f"{' ' * 4 * indentation}{dest} += {value}") | |
elif "sub" in instruction: | |
parts = instruction.split("sub")[1].split(",", 1) | |
if len(parts) > 1: | |
dest, value = parts | |
dest = dest.strip() | |
value = value.strip() | |
python_code.append(f"{' ' * 4 * indentation}{dest} -= {value}") | |
# Other arithmetic | |
elif "mul" in instruction: | |
operand = instruction.split("mul")[1].strip() | |
python_code.append(f"{' ' * 4 * indentation}# Multiply by {operand}") | |
elif "div" in instruction: | |
operand = instruction.split("div")[1].strip() | |
python_code.append(f"{' ' * 4 * indentation}# Divide by {operand}") | |
# Default case - just comment the assembly | |
else: | |
python_code.append(f"{' ' * 4 * indentation}# {instruction}") | |
i += 1 | |
# Add main execution | |
python_code.append("\nif __name__ == '__main__':") | |
if functions: | |
first_function = next(iter(functions)) | |
python_code.append(f" {first_function}()") | |
else: | |
python_code.append(" pass # No clear entry point found") | |
return "\n".join(python_code) | |
def assembly_to_cpp(assembly_lines): | |
"""Convert assembly code to C++-like code""" | |
cpp_code = [] | |
cpp_code.append("// C++ code decompiled from assembly") | |
cpp_code.append("// This is an approximation of the original code") | |
cpp_code.append("") | |
cpp_code.append("#include <iostream>") | |
cpp_code.append("#include <vector>") | |
cpp_code.append("#include <string>") | |
cpp_code.append("") | |
# Track variables and functions | |
variables = set() | |
functions = set() | |
current_function = None | |
indentation = 0 | |
in_loop = False | |
in_condition = False | |
# Add forward declarations | |
cpp_code.append("// Forward declarations") | |
i = 0 | |
# First pass to identify functions | |
while i < len(assembly_lines): | |
line = assembly_lines[i] | |
# Parse assembly line | |
if ": " in line: | |
parts = line.split(": ", 1) | |
if len(parts) > 1: | |
addr, instruction = parts | |
# Function start detection | |
if "push ebp" in instruction and "mov ebp, esp" in assembly_lines[i+1] if i+1 < len(assembly_lines) else "": | |
func_name = f"function_{addr.replace('0x', '')}" | |
functions.add(func_name) | |
cpp_code.append(f"void {func_name}();") | |
i += 1 | |
cpp_code.append("") | |
cpp_code.append("// Variable declarations") | |
cpp_code.append("int eax, ebx, ecx, edx, esi, edi, ebp, esp;") | |
cpp_code.append("") | |
# Second pass to generate function code | |
i = 0 | |
while i < len(assembly_lines): | |
line = assembly_lines[i] | |
# Parse assembly line | |
if ": " in line: | |
parts = line.split(": ", 1) | |
if len(parts) > 1: | |
addr, instruction = parts | |
# Function start detection | |
if "push ebp" in instruction and "mov ebp, esp" in assembly_lines[i+1] if i+1 < len(assembly_lines) else "": | |
current_function = f"function_{addr.replace('0x', '')}" | |
cpp_code.append(f"\nvoid {current_function}() {{") | |
indentation = 1 | |
i += 2 # Skip the function prologue | |
continue | |
# Function return detection | |
if "ret" in instruction and indentation > 0: | |
cpp_code.append(f"{' ' * 4 * indentation}return;") | |
cpp_code.append("}") | |
indentation = 0 | |
current_function = None | |
# Call instruction - function call | |
if "call" in instruction: | |
target = instruction.split("call")[1].strip() | |
if target.startswith("0x"): | |
called_func = f"function_{target.replace('0x', '')}" | |
cpp_code.append(f"{' ' * 4 * indentation}{called_func}();") | |
else: | |
cpp_code.append(f"{' ' * 4 * indentation}// Call to external function: {target}") | |
# Variable assignment (mov) | |
elif "mov" in instruction: | |
dest, source = instruction.split("mov")[1].split(",", 1) | |
dest = dest.strip() | |
source = source.strip() | |
# Check if memory access | |
if "[" in dest: | |
cpp_code.append(f"{' ' * 4 * indentation}// Memory write to {dest}") | |
elif "[" in source: | |
cpp_code.append(f"{' ' * 4 * indentation}// Memory read from {source}") | |
else: | |
cpp_code.append(f"{' ' * 4 * indentation}{dest} = {source};") | |
# Comparison and jumps (if statements) | |
elif "cmp" in instruction: | |
parts = instruction.split("cmp")[1].split(",", 1) | |
if len(parts) > 1: | |
a, b = parts | |
a = a.strip() | |
b = b.strip() | |
# Look ahead for jump instruction | |
next_line = assembly_lines[i+1] if i+1 < len(assembly_lines) else "" | |
if "j" in next_line: # Any jump instruction | |
jump_type = next_line.split(": ")[1].split()[0] if ": " in next_line else "" | |
if jump_type == "je" or jump_type == "jz": | |
cpp_code.append(f"{' ' * 4 * indentation}if ({a} == {b}) {{") | |
elif jump_type == "jne" or jump_type == "jnz": | |
cpp_code.append(f"{' ' * 4 * indentation}if ({a} != {b}) {{") | |
elif jump_type == "jg" or jump_type == "jnle": | |
cpp_code.append(f"{' ' * 4 * indentation}if ({a} > {b}) {{") | |
elif jump_type == "jge" or jump_type == "jnl": | |
cpp_code.append(f"{' ' * 4 * indentation}if ({a} >= {b}) {{") | |
elif jump_type == "jl" or jump_type == "jnge": | |
cpp_code.append(f"{' ' * 4 * indentation}if ({a} < {b}) {{") | |
elif jump_type == "jle" or jump_type == "jng": | |
cpp_code.append(f"{' ' * 4 * indentation}if ({a} <= {b}) {{") | |
else: | |
cpp_code.append(f"{' ' * 4 * indentation}// Comparison: {a} ? {b}") | |
indentation += 1 | |
in_condition = True | |
i += 1 # Skip the jump instruction | |
continue | |
# Loop detection (simplified) | |
elif "loop" in instruction or "jmp" in instruction: | |
if not in_loop: | |
cpp_code.append(f"{' ' * 4 * indentation}while (true) {{ // Loop at {addr}") | |
indentation += 1 | |
in_loop = True | |
else: | |
cpp_code.append(f"{' ' * 4 * indentation}// Jump or loop at {addr}") | |
# Add/sub operations | |
elif "add" in instruction: | |
parts = instruction.split("add")[1].split(",", 1) | |
if len(parts) > 1: | |
dest, value = parts | |
dest = dest.strip() | |
value = value.strip() | |
cpp_code.append(f"{' ' * 4 * indentation}{dest} += {value};") | |
elif "sub" in instruction: | |
parts = instruction.split("sub")[1].split(",", 1) | |
if len(parts) > 1: | |
dest, value = parts | |
dest = dest.strip() | |
value = value.strip() | |
cpp_code.append(f"{' ' * 4 * indentation}{dest} -= {value};") | |
# Other arithmetic | |
elif "mul" in instruction: | |
operand = instruction.split("mul")[1].strip() | |
cpp_code.append(f"{' ' * 4 * indentation}// Multiply by {operand}") | |
elif "div" in instruction: | |
operand = instruction.split("div")[1].strip() | |
cpp_code.append(f"{' ' * 4 * indentation}// Divide by {operand}") | |
# Default case - just comment the assembly | |
else: | |
cpp_code.append(f"{' ' * 4 * indentation}// {instruction}") | |
i += 1 | |
# Add main function | |
cpp_code.append("\nint main() {") | |
if functions: | |
first_function = next(iter(functions)) | |
cpp_code.append(f" {first_function}();") | |
else: | |
cpp_code.append(" // No clear entry point found") | |
cpp_code.append(" return 0;") | |
cpp_code.append("}") | |
return "\n".join(cpp_code) | |
def try_pyinstaller_extraction(file_path, output_dir): | |
"""Attempt to extract Python scripts from PyInstaller executables""" | |
try: | |
# Run pyinstaller-extractor on the file | |
# Try both potential command names | |
try: | |
result = subprocess.run(["python", "-m", "pyinstxtractor", file_path], | |
cwd=output_dir, capture_output=True, text=True) | |
except: | |
try: | |
result = subprocess.run(["python", "-m", "pyinstaller_extractor", file_path], | |
cwd=output_dir, capture_output=True, text=True) | |
except: | |
# Direct command attempt | |
result = subprocess.run(["pyinstxtractor", file_path], | |
cwd=output_dir, capture_output=True, text=True) | |
extracted_dir = os.path.join(output_dir, os.path.basename(file_path) + "_extracted") | |
if not os.path.exists(extracted_dir): | |
# Try with different naming convention | |
potential_dirs = [d for d in os.listdir(output_dir) if os.path.isdir(os.path.join(output_dir, d)) and "_extracted" in d] | |
if potential_dirs: | |
extracted_dir = os.path.join(output_dir, potential_dirs[0]) | |
if os.path.exists(extracted_dir): | |
# Try to decompile the Python bytecode files | |
python_files = {} | |
for root, _, files in os.walk(extracted_dir): | |
for file in files: | |
if file.endswith('.pyc') or file.endswith('.pyo'): | |
pyc_path = os.path.join(root, file) | |
py_path = pyc_path + ".py" | |
try: | |
# Try decompiling with uncompyle6 | |
subprocess.run(["uncompyle6", pyc_path, "-o", py_path], capture_output=True) | |
if os.path.exists(py_path): | |
with open(py_path, 'r', encoding='utf-8', errors='ignore') as f: | |
rel_path = os.path.relpath(pyc_path, extracted_dir) | |
python_files[rel_path] = f.read() | |
except: | |
pass | |
# Check if we found the PYZ archive (PyInstaller specific) | |
pyz_files = [] | |
for root, _, files in os.walk(extracted_dir): | |
for file in files: | |
if file.endswith('.pyz'): | |
pyz_path = os.path.join(root, file) | |
# Extract PYZ contents | |
pyz_extract_dir = pyz_path + "_extracted" | |
os.makedirs(pyz_extract_dir, exist_ok=True) | |
try: | |
# Try different extraction methods for the PYZ | |
try: | |
subprocess.run(["python", "-m", "pyinstxtractor", pyz_path], | |
cwd=output_dir, capture_output=True) | |
except: | |
try: | |
subprocess.run(["python", "-m", "pyinstaller_extractor", pyz_path], | |
cwd=output_dir, capture_output=True) | |
except: | |
# Direct command attempt | |
subprocess.run(["pyinstxtractor", pyz_path], | |
cwd=output_dir, capture_output=True) | |
# Look for extracted PYZ content | |
if os.path.exists(pyz_extract_dir): | |
for pyz_root, _, pyz_files in os.walk(pyz_extract_dir): | |
for pyz_file in pyz_files: | |
if pyz_file.endswith('.pyc') or pyz_file.endswith('.pyo'): | |
pyc_path = os.path.join(pyz_root, pyz_file) | |
py_path = pyc_path + ".py" | |
try: | |
subprocess.run(["uncompyle6", pyc_path, "-o", py_path], capture_output=True) | |
if os.path.exists(py_path): | |
with open(py_path, 'r', encoding='utf-8', errors='ignore') as f: | |
rel_path = os.path.join("PYZ_ARCHIVE", os.path.relpath(pyc_path, pyz_extract_dir)) | |
python_files[rel_path] = f.read() | |
except: | |
pass | |
except: | |
pass | |
return { | |
"success": True, | |
"message": "Successfully extracted Python code", | |
"files": python_files | |
} | |
return { | |
"success": False, | |
"message": "Not a PyInstaller executable or extraction failed" | |
} | |
except Exception as e: | |
return { | |
"success": False, | |
"message": f"PyInstaller extraction error: {str(e)}" | |
} | |
def disassemble_binary(file_path, is_dll=False): | |
"""Disassemble a binary file to get assembly code""" | |
try: | |
# Try with radare2 first | |
try: | |
import r2pipe | |
r2 = r2pipe.open(file_path) | |
r2.cmd("aaa") # Analyze all | |
# Get main or entry point disassembly | |
main_disasm = r2.cmd("s main; pdf") | |
if not main_disasm or "Cannot find function" in main_disasm: | |
main_disasm = r2.cmd("s entry0; pdf") # Try entry point instead | |
# Get list of functions | |
functions = r2.cmd("afl") | |
# Get all functions disassembly for more complete code | |
all_functions_disasm = [] | |
function_addresses = re.findall(r'0x[0-9a-fA-F]+', functions) | |
for addr in function_addresses[:10]: # Limit to first 10 functions to avoid huge output | |
func_disasm = r2.cmd(f"s {addr}; pdf") | |
all_functions_disasm.append(func_disasm) | |
r2.quit() | |
# Extract assembly instructions | |
assembly_lines = [] | |
for disasm in [main_disasm] + all_functions_disasm: | |
for line in disasm.splitlines(): | |
if "β" in line: # radare2 format contains this separator | |
parts = line.split("β") | |
if len(parts) > 1: | |
addr_part = parts[0].strip() | |
instr_part = parts[-1].strip() | |
if addr_part and instr_part and "0x" in addr_part: | |
address = addr_part.strip() | |
instruction = instr_part.strip() | |
assembly_lines.append(f"{address}: {instruction}") | |
return assembly_lines | |
except: | |
# Fallback to pefile + capstone if radare2 fails | |
pe = pefile.PE(file_path) | |
# Determine if 32-bit or 64-bit | |
is_64bit = pe.OPTIONAL_HEADER.Magic == 0x20b | |
mode = CS_MODE_64 if is_64bit else CS_MODE_32 | |
# Initialize disassembler | |
md = Cs(CS_ARCH_X86, mode) | |
md.detail = True | |
assembly_lines = [] | |
# Find and disassemble code sections | |
for section in pe.sections: | |
if section.Characteristics & 0x20000000: # IMAGE_SCN_CNT_CODE | |
section_data = pe.get_data(section.VirtualAddress, section.SizeOfRawData) | |
section_addr = pe.OPTIONAL_HEADER.ImageBase + section.VirtualAddress | |
# Disassemble section code | |
for i, (address, size, mnemonic, op_str) in enumerate(md.disasm_lite(section_data, section_addr)): | |
if i >= 500: # Limit to 500 instructions per section | |
break | |
assembly_lines.append(f"0x{address:08x}: {mnemonic} {op_str}") | |
return assembly_lines | |
except Exception as e: | |
return [f"Disassembly error: {str(e)}"] | |
def analyze_binary(file_path, is_dll=False): | |
"""Comprehensive analysis and decompilation of a binary file""" | |
try: | |
results = {} | |
# Basic PE information using pefile | |
pe = pefile.PE(file_path) | |
# General info | |
results["basic_info"] = { | |
"Machine": hex(pe.FILE_HEADER.Machine), | |
"TimeDateStamp": pe.FILE_HEADER.TimeDateStamp, | |
"NumberOfSections": pe.FILE_HEADER.NumberOfSections, | |
"SizeOfImage": pe.OPTIONAL_HEADER.SizeOfImage, | |
"EntryPoint": hex(pe.OPTIONAL_HEADER.AddressOfEntryPoint), | |
"ImageBase": hex(pe.OPTIONAL_HEADER.ImageBase), | |
"Subsystem": pe.OPTIONAL_HEADER.Subsystem | |
} | |
# Section information | |
results["sections"] = [] | |
for section in pe.sections: | |
section_name = section.Name.decode('utf-8', errors='ignore').strip('\x00') | |
results["sections"].append({ | |
"Name": section_name, | |
"VirtualAddress": hex(section.VirtualAddress), | |
"VirtualSize": section.Misc_VirtualSize, | |
"SizeOfRawData": section.SizeOfRawData, | |
"Entropy": section.get_entropy(), | |
"Characteristics": hex(section.Characteristics) | |
}) | |
# Import information | |
results["imports"] = [] | |
if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'): | |
for entry in pe.DIRECTORY_ENTRY_IMPORT: | |
dll_name = entry.dll.decode('utf-8', errors='ignore') | |
imports = [] | |
for imp in entry.imports: | |
if imp.name: | |
imports.append(imp.name.decode('utf-8', errors='ignore')) | |
results["imports"].append({ | |
"DLL": dll_name, | |
"Functions": imports | |
}) | |
# Export information (for DLLs) | |
results["exports"] = [] | |
if is_dll and hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'): | |
for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols: | |
if exp.name: | |
results["exports"].append({ | |
"Name": exp.name.decode('utf-8', errors='ignore'), | |
"Address": hex(pe.OPTIONAL_HEADER.ImageBase + exp.address), | |
"Ordinal": exp.ordinal | |
}) | |
# Extract strings | |
results["strings"] = extract_strings(file_path) | |
# Disassemble to get assembly | |
assembly_lines = disassemble_binary(file_path, is_dll) | |
results["assembly_lines"] = assembly_lines | |
# Convert assembly to Python | |
results["python_code"] = assembly_to_python(assembly_lines) | |
# Convert assembly to C++ | |
results["cpp_code"] = assembly_to_cpp(assembly_lines) | |
return results | |
except Exception as e: | |
return {"Error": str(e)} | |
def process_zip_file(file_path, temp_dir): | |
"""Process a ZIP file and extract its contents""" | |
try: | |
with zipfile.ZipFile(file_path, 'r') as zip_ref: | |
# Get file list before extraction | |
file_list = zip_ref.namelist() | |
# Extract to temp directory | |
zip_ref.extractall(temp_dir) | |
# Check for nested executables | |
nested_files = {} | |
for root, _, files in os.walk(temp_dir): | |
for file in files: | |
full_path = os.path.join(root, file) | |
rel_path = os.path.relpath(full_path, temp_dir) | |
if file.endswith('.exe'): | |
exe_output_dir = os.path.join(temp_dir, f"{file}_unpacked") | |
os.makedirs(exe_output_dir, exist_ok=True) | |
nested_files[rel_path] = { | |
'type': 'exe', | |
'analysis': analyze_binary(full_path), | |
'python_extraction': try_pyinstaller_extraction(full_path, exe_output_dir) | |
} | |
elif file.endswith('.dll'): | |
nested_files[rel_path] = { | |
'type': 'dll', | |
'analysis': analyze_binary(full_path, is_dll=True) | |
} | |
return { | |
'file_list': file_list, | |
'nested_files': nested_files | |
} | |
except Exception as e: | |
return {'error': str(e)} | |
# Main app logic | |
uploaded_file = st.file_uploader("Upload a file (.zip, .exe, or .dll)", type=["zip", "exe", "dll"]) | |
if uploaded_file is not None: | |
with tempfile.TemporaryDirectory() as temp_dir: | |
# Save the uploaded file to the temporary directory | |
file_path = os.path.join(temp_dir, uploaded_file.name) | |
with open(file_path, "wb") as f: | |
f.write(uploaded_file.getbuffer()) | |
st.success(f"File uploaded: {uploaded_file.name}") | |
# Display file size | |
file_size = os.path.getsize(file_path) | |
st.info(f"File size: {file_size/1024:.2f} KB") | |
# Process based on file type | |
if uploaded_file.name.lower().endswith('.zip'): | |
st.subheader("ZIP File Contents") | |
with st.spinner("Analyzing ZIP contents..."): | |
output_dir = os.path.join(temp_dir, "extracted") | |
os.makedirs(output_dir, exist_ok=True) | |
result = process_zip_file(file_path, output_dir) | |
if 'error' in result: | |
st.error(f"Error processing ZIP file: {result['error']}") | |
else: | |
with st.expander("ZIP Contents", expanded=True): | |
st.write(f"Total files: {len(result['file_list'])}") | |
st.code("\n".join(result['file_list'])) | |
if result['nested_files']: | |
st.subheader("Detected Executable Files") | |
for file_path, file_info in result['nested_files'].items(): | |
with st.expander(f"{file_path} ({file_info['type'].upper()})"): | |
if file_info['type'] == 'exe': | |
analysis = file_info['analysis'] | |
python_extraction = file_info['python_extraction'] | |
tabs = st.tabs(["Python Code", "C++ Code", "Assembly", "Imports", "Strings"]) | |
with tabs[0]: | |
if "Error" in analysis: | |
st.error(analysis["Error"]) | |
else: | |
# Check if we have extracted Python code | |
if python_extraction.get("success", False): | |
st.success("Original Python code extracted successfully!") | |
for filename, content in python_extraction.get("files", {}).items(): | |
with st.expander(f"Python File: {filename}"): | |
st.code(content, language="python") | |
else: | |
# Show decompiled Python from assembly | |
st.warning("Converting assembly to Python code (not original source)") | |
st.code(analysis.get("python_code", "# Failed to generate Python code"), language="python") | |
with tabs[1]: | |
st.subheader("Decompiled C++ Code") | |
st.code(analysis.get("cpp_code", "// Failed to generate C++ code"), language="cpp") | |
with tabs[2]: | |
st.subheader("Assembly Code") | |
assembly = "\n".join(analysis.get("assembly_lines", [])) | |
st.code(assembly, language="asm") | |
with tabs[3]: | |
st.subheader("Imported Functions") | |
for imp in analysis.get("imports", []): | |
with st.expander(f"DLL: {imp['DLL']}"): | |
st.code("\n".join(imp["Functions"])) | |
with tabs[4]: | |
st.subheader("Strings Found") | |
all_strings = analysis.get("strings", []) | |
interesting_strings = [s for s in all_strings if len(s) > 8] # Filter out very short strings | |
st.code("\n".join(interesting_strings[:500])) # Limit to 500 strings | |
else: # DLL | |
analysis = file_info['analysis'] | |
tabs = st.tabs(["Python Code", "C++ Code", "Assembly", "Exports", "Imports", "Strings"]) | |
with tabs[0]: | |
if "Error" in analysis: | |
st.error(analysis["Error"]) | |
else: | |
st.subheader("Decompiled Python Code") | |
st.code(analysis.get("python_code", "# Failed to generate Python code"), language="python") | |
with tabs[1]: | |
st.subheader("Decompiled C++ Code") | |
st.code(analysis.get("cpp_code", "// Failed to generate C++ code"), language="cpp") | |
with tabs[2]: | |
st.subheader("Assembly Code") | |
assembly = "\n".join(analysis.get("assembly_lines", [])) | |
st.code(assembly, language="asm") | |
with tabs[3]: | |
st.subheader("Exported Functions") | |
st.json(analysis.get("exports", [])) | |
with tabs[4]: | |
st.subheader("Imported Functions") | |
for imp in analysis.get("imports", []): | |
with st.expander(f"DLL: {imp['DLL']}"): | |
st.code("\n".join(imp["Functions"])) | |
with tabs[5]: | |
st.subheader("Strings Found") | |
all_strings = analysis.get("strings", []) | |
interesting_strings = [s for s in all_strings if len(s) > 8] # Filter out very short strings | |
st.code("\n".join(interesting_strings[:500])) # Limit to 500 strings | |
elif uploaded_file.name.lower().endswith('.exe'): | |
st.subheader("EXE File Analysis") | |
with st.spinner("Decompiling executable..."): | |
output_dir = os.path.join(temp_dir, "exe_unpacked") | |
os.makedirs(output_dir, exist_ok=True) | |
# Perform comprehensive analysis | |
analysis = analyze_binary(file_path) | |
# Try Python extraction | |
python_extraction = try_pyinstaller_extraction(file_path, output_dir) | |
tabs = st.tabs(["Python Code", "C++ Code", "Assembly", "Summary", "Imports", "Strings"]) | |
with tabs[0]: | |
if "Error" in analysis: | |
st.error(analysis["Error"]) | |
else: | |
# Check if we have extracted Python code | |
if python_extraction.get("success", False): | |
st.success("Original Python code extracted successfully!") | |
for filename, content in python_extraction.get("files", {}).items(): | |
with st.expander(f"Python File: {filename}"): | |
st.code(content, language="python") | |
else: | |
# Show decompiled Python from assembly | |
st.warning("Converting assembly to Python code (not original source)") | |
st.code(analysis.get("python_code", "# Failed to generate Python code"), language="python") | |
with tabs[1]: | |
st.subheader("Decompiled C++ Code") | |
st.code(analysis.get("cpp_code", "// Failed to generate C++ code"), language="cpp") | |
with tabs[2]: | |
st.subheader("Assembly Code") | |
assembly = "\n".join(analysis.get("assembly_lines", [])) | |
st.code(assembly, language="asm") | |
with tabs[3]: | |
st.subheader("Basic Information") | |
st.json(analysis.get("basic_info", {})) | |
st.subheader("Sections") | |
sections_df = { | |
"Name": [], | |
"VirtualSize": [], | |
"SizeOfRawData": [], | |
"Entropy": [] | |
} | |
for section in analysis.get("sections", []): | |
sections_df["Name"].append(section["Name"]) | |
sections_df["VirtualSize"].append(section["VirtualSize"]) | |
sections_df["SizeOfRawData"].append(section["SizeOfRawData"]) | |
sections_df["Entropy"].append(section["Entropy"]) | |
st.dataframe(sections_df) | |
with tabs[4]: | |
st.subheader("Imported Functions") | |
for imp in analysis.get("imports", []): | |
with st.expander(f"DLL: {imp['DLL']}"): | |
st.code("\n".join(imp["Functions"])) | |
with tabs[5]: | |
st.subheader("Strings Found") | |
all_strings = analysis.get("strings", []) | |
interesting_strings = [s for s in all_strings if len(s) > 8] # Filter out very short strings | |
st.code("\n".join(interesting_strings[:500])) # Limit to 500 strings | |
elif uploaded_file.name.lower().endswith('.dll'): | |
st.subheader("DLL File Analysis") | |
with st.spinner("Decompiling DLL..."): | |
# Perform comprehensive analysis (with is_dll=True) | |
analysis = analyze_binary(file_path, is_dll=True) | |
tabs = st.tabs(["Python Code", "C++ Code", "Assembly", "Summary", "Exports", "Imports", "Strings"]) | |
with tabs[0]: | |
if "Error" in analysis: | |
st.error(analysis["Error"]) | |
else: | |
st.subheader("Decompiled Python Code") | |
st.code(analysis.get("python_code", "# Failed to generate Python code"), language="python") | |
with tabs[1]: | |
st.subheader("Decompiled C++ Code") | |
st.code(analysis.get("cpp_code", "// Failed to generate C++ code"), language="cpp") | |
with tabs[2]: | |
st.subheader("Assembly Code") | |
assembly = "\n".join(analysis.get("assembly_lines", [])) | |
st.code(assembly, language="asm") | |
with tabs[3]: | |
st.subheader("Basic Information") | |
st.json(analysis.get("basic_info", {})) | |
st.subheader("Sections") | |
sections_df = { | |
"Name": [], | |
"VirtualSize": [], | |
"SizeOfRawData": [], | |
"Entropy": [] | |
} | |
for section in analysis.get("sections", []): | |
sections_df["Name"].append(section["Name"]) | |
sections_df["VirtualSize"].append(section["VirtualSize"]) | |
sections_df["SizeOfRawData"].append(section["SizeOfRawData"]) | |
sections_df["Entropy"].append(section["Entropy"]) | |
st.dataframe(sections_df) | |
with tabs[4]: | |
st.subheader("Exported Functions") | |
st.json(analysis.get("exports", [])) | |
with tabs[5]: | |
st.subheader("Imported Functions") | |
for imp in analysis.get("imports", []): | |
with st.expander(f"DLL: {imp['DLL']}"): | |
st.code("\n".join(imp["Functions"])) | |
with tabs[6]: | |
st.subheader("Strings Found") | |
all_strings = analysis.get("strings", []) | |
interesting_strings = [s for s in all_strings if len(s) > 8] # Filter out very short strings | |
st.code("\n".join(interesting_strings[:500])) # Limit to 500 strings |