Exe_unpacker / app.py
euler314's picture
Update app.py
64457ce verified
import streamlit as st
import os
import zipfile
import tempfile
import pefile
import subprocess
import binascii
import re
import json
from pathlib import Path
st.set_page_config(page_title="Binary Decompiler", page_icon="πŸ”", layout="wide")
st.title("Executable Decompiler Tool")
st.markdown("""
This tool decompiles executables to source code:
- Extracts assembly instructions from any .exe or .dll
- Converts assembly to Python-like or C++-like code
- Extracts embedded strings and resources
- Works with any executable regardless of original language
""")
# Install necessary packages at startup
try:
with st.spinner("Setting up decompilation environment..."):
# Install key analysis libraries
subprocess.run(["pip", "install", "pyinstaller-extractor"], capture_output=True)
subprocess.run(["pip", "install", "uncompyle6"], capture_output=True)
subprocess.run(["pip", "install", "capstone"], capture_output=True)
subprocess.run(["pip", "install", "r2pipe"], capture_output=True)
# Import after installation
import r2pipe
from capstone import Cs, CS_ARCH_X86, CS_MODE_32, CS_MODE_64
st.success("Environment ready")
except Exception as e:
st.error(f"Setup error: {str(e)}")
def extract_strings(file_path, min_length=4):
"""Extract ASCII and Unicode strings from binary file"""
try:
with open(file_path, 'rb') as f:
content = f.read()
# Extract ASCII strings
ascii_pattern = b'[\\x20-\\x7E]{' + str(min_length).encode() + b',}'
ascii_strings = re.findall(ascii_pattern, content)
ascii_strings = [s.decode('ascii') for s in ascii_strings]
# Extract Unicode strings (basic approach)
unicode_pattern = b'(?:[\x20-\x7E]\x00){' + str(min_length).encode() + b',}'
unicode_strings = re.findall(unicode_pattern, content)
unicode_strings = [s.decode('utf-16-le', errors='ignore').rstrip('\x00') for s in unicode_strings]
return sorted(set(ascii_strings + unicode_strings))
except Exception as e:
return [f"Error extracting strings: {str(e)}"]
def assembly_to_python(assembly_lines):
"""Convert assembly code to Python-like code"""
python_code = []
python_code.append("# Python code decompiled from assembly")
python_code.append("# This is an approximation of the original code")
python_code.append("")
# Track variables and functions
variables = set()
functions = set()
current_function = None
indentation = 0
in_loop = False
in_condition = False
i = 0
while i < len(assembly_lines):
line = assembly_lines[i]
# Parse assembly line
if ": " in line:
parts = line.split(": ", 1)
if len(parts) > 1:
addr, instruction = parts
# Function start detection
if "push ebp" in instruction and "mov ebp, esp" in assembly_lines[i+1] if i+1 < len(assembly_lines) else "":
current_function = f"function_{addr}"
functions.add(current_function)
python_code.append(f"\ndef {current_function}():")
indentation = 1
i += 2 # Skip the function prologue
continue
# Function return detection
if "ret" in instruction and indentation > 0:
python_code.append(f"{' ' * 4 * indentation}return")
indentation = 0
current_function = None
# Call instruction - function call
if "call" in instruction:
target = instruction.split("call")[1].strip()
if target.startswith("0x"):
called_func = f"function_{target}"
functions.add(called_func)
python_code.append(f"{' ' * 4 * indentation}{called_func}()")
else:
python_code.append(f"{' ' * 4 * indentation}# Call to external function: {target}")
# Variable assignment (mov)
elif "mov" in instruction:
dest, source = instruction.split("mov")[1].split(",", 1)
dest = dest.strip()
source = source.strip()
if dest not in variables:
variables.add(dest)
python_code.append(f"{' ' * 4 * indentation}{dest} = {source}")
# Comparison and jumps (if statements)
elif "cmp" in instruction:
parts = instruction.split("cmp")[1].split(",", 1)
if len(parts) > 1:
a, b = parts
a = a.strip()
b = b.strip()
# Look ahead for jump instruction
next_line = assembly_lines[i+1] if i+1 < len(assembly_lines) else ""
if "j" in next_line: # Any jump instruction
jump_type = next_line.split(": ")[1].split()[0] if ": " in next_line else ""
if jump_type == "je" or jump_type == "jz":
python_code.append(f"{' ' * 4 * indentation}if {a} == {b}:")
elif jump_type == "jne" or jump_type == "jnz":
python_code.append(f"{' ' * 4 * indentation}if {a} != {b}:")
elif jump_type == "jg" or jump_type == "jnle":
python_code.append(f"{' ' * 4 * indentation}if {a} > {b}:")
elif jump_type == "jge" or jump_type == "jnl":
python_code.append(f"{' ' * 4 * indentation}if {a} >= {b}:")
elif jump_type == "jl" or jump_type == "jnge":
python_code.append(f"{' ' * 4 * indentation}if {a} < {b}:")
elif jump_type == "jle" or jump_type == "jng":
python_code.append(f"{' ' * 4 * indentation}if {a} <= {b}:")
else:
python_code.append(f"{' ' * 4 * indentation}# Comparison: {a} ? {b}")
indentation += 1
in_condition = True
i += 1 # Skip the jump instruction
continue
# Loop detection (simplified)
elif "loop" in instruction or "jmp" in instruction:
if not in_loop:
python_code.append(f"{' ' * 4 * indentation}while True: # Loop at {addr}")
indentation += 1
in_loop = True
else:
python_code.append(f"{' ' * 4 * indentation}# Jump or loop at {addr}")
# Add/sub operations
elif "add" in instruction:
parts = instruction.split("add")[1].split(",", 1)
if len(parts) > 1:
dest, value = parts
dest = dest.strip()
value = value.strip()
python_code.append(f"{' ' * 4 * indentation}{dest} += {value}")
elif "sub" in instruction:
parts = instruction.split("sub")[1].split(",", 1)
if len(parts) > 1:
dest, value = parts
dest = dest.strip()
value = value.strip()
python_code.append(f"{' ' * 4 * indentation}{dest} -= {value}")
# Other arithmetic
elif "mul" in instruction:
operand = instruction.split("mul")[1].strip()
python_code.append(f"{' ' * 4 * indentation}# Multiply by {operand}")
elif "div" in instruction:
operand = instruction.split("div")[1].strip()
python_code.append(f"{' ' * 4 * indentation}# Divide by {operand}")
# Default case - just comment the assembly
else:
python_code.append(f"{' ' * 4 * indentation}# {instruction}")
i += 1
# Add main execution
python_code.append("\nif __name__ == '__main__':")
if functions:
first_function = next(iter(functions))
python_code.append(f" {first_function}()")
else:
python_code.append(" pass # No clear entry point found")
return "\n".join(python_code)
def assembly_to_cpp(assembly_lines):
"""Convert assembly code to C++-like code"""
cpp_code = []
cpp_code.append("// C++ code decompiled from assembly")
cpp_code.append("// This is an approximation of the original code")
cpp_code.append("")
cpp_code.append("#include <iostream>")
cpp_code.append("#include <vector>")
cpp_code.append("#include <string>")
cpp_code.append("")
# Track variables and functions
variables = set()
functions = set()
current_function = None
indentation = 0
in_loop = False
in_condition = False
# Add forward declarations
cpp_code.append("// Forward declarations")
i = 0
# First pass to identify functions
while i < len(assembly_lines):
line = assembly_lines[i]
# Parse assembly line
if ": " in line:
parts = line.split(": ", 1)
if len(parts) > 1:
addr, instruction = parts
# Function start detection
if "push ebp" in instruction and "mov ebp, esp" in assembly_lines[i+1] if i+1 < len(assembly_lines) else "":
func_name = f"function_{addr.replace('0x', '')}"
functions.add(func_name)
cpp_code.append(f"void {func_name}();")
i += 1
cpp_code.append("")
cpp_code.append("// Variable declarations")
cpp_code.append("int eax, ebx, ecx, edx, esi, edi, ebp, esp;")
cpp_code.append("")
# Second pass to generate function code
i = 0
while i < len(assembly_lines):
line = assembly_lines[i]
# Parse assembly line
if ": " in line:
parts = line.split(": ", 1)
if len(parts) > 1:
addr, instruction = parts
# Function start detection
if "push ebp" in instruction and "mov ebp, esp" in assembly_lines[i+1] if i+1 < len(assembly_lines) else "":
current_function = f"function_{addr.replace('0x', '')}"
cpp_code.append(f"\nvoid {current_function}() {{")
indentation = 1
i += 2 # Skip the function prologue
continue
# Function return detection
if "ret" in instruction and indentation > 0:
cpp_code.append(f"{' ' * 4 * indentation}return;")
cpp_code.append("}")
indentation = 0
current_function = None
# Call instruction - function call
if "call" in instruction:
target = instruction.split("call")[1].strip()
if target.startswith("0x"):
called_func = f"function_{target.replace('0x', '')}"
cpp_code.append(f"{' ' * 4 * indentation}{called_func}();")
else:
cpp_code.append(f"{' ' * 4 * indentation}// Call to external function: {target}")
# Variable assignment (mov)
elif "mov" in instruction:
dest, source = instruction.split("mov")[1].split(",", 1)
dest = dest.strip()
source = source.strip()
# Check if memory access
if "[" in dest:
cpp_code.append(f"{' ' * 4 * indentation}// Memory write to {dest}")
elif "[" in source:
cpp_code.append(f"{' ' * 4 * indentation}// Memory read from {source}")
else:
cpp_code.append(f"{' ' * 4 * indentation}{dest} = {source};")
# Comparison and jumps (if statements)
elif "cmp" in instruction:
parts = instruction.split("cmp")[1].split(",", 1)
if len(parts) > 1:
a, b = parts
a = a.strip()
b = b.strip()
# Look ahead for jump instruction
next_line = assembly_lines[i+1] if i+1 < len(assembly_lines) else ""
if "j" in next_line: # Any jump instruction
jump_type = next_line.split(": ")[1].split()[0] if ": " in next_line else ""
if jump_type == "je" or jump_type == "jz":
cpp_code.append(f"{' ' * 4 * indentation}if ({a} == {b}) {{")
elif jump_type == "jne" or jump_type == "jnz":
cpp_code.append(f"{' ' * 4 * indentation}if ({a} != {b}) {{")
elif jump_type == "jg" or jump_type == "jnle":
cpp_code.append(f"{' ' * 4 * indentation}if ({a} > {b}) {{")
elif jump_type == "jge" or jump_type == "jnl":
cpp_code.append(f"{' ' * 4 * indentation}if ({a} >= {b}) {{")
elif jump_type == "jl" or jump_type == "jnge":
cpp_code.append(f"{' ' * 4 * indentation}if ({a} < {b}) {{")
elif jump_type == "jle" or jump_type == "jng":
cpp_code.append(f"{' ' * 4 * indentation}if ({a} <= {b}) {{")
else:
cpp_code.append(f"{' ' * 4 * indentation}// Comparison: {a} ? {b}")
indentation += 1
in_condition = True
i += 1 # Skip the jump instruction
continue
# Loop detection (simplified)
elif "loop" in instruction or "jmp" in instruction:
if not in_loop:
cpp_code.append(f"{' ' * 4 * indentation}while (true) {{ // Loop at {addr}")
indentation += 1
in_loop = True
else:
cpp_code.append(f"{' ' * 4 * indentation}// Jump or loop at {addr}")
# Add/sub operations
elif "add" in instruction:
parts = instruction.split("add")[1].split(",", 1)
if len(parts) > 1:
dest, value = parts
dest = dest.strip()
value = value.strip()
cpp_code.append(f"{' ' * 4 * indentation}{dest} += {value};")
elif "sub" in instruction:
parts = instruction.split("sub")[1].split(",", 1)
if len(parts) > 1:
dest, value = parts
dest = dest.strip()
value = value.strip()
cpp_code.append(f"{' ' * 4 * indentation}{dest} -= {value};")
# Other arithmetic
elif "mul" in instruction:
operand = instruction.split("mul")[1].strip()
cpp_code.append(f"{' ' * 4 * indentation}// Multiply by {operand}")
elif "div" in instruction:
operand = instruction.split("div")[1].strip()
cpp_code.append(f"{' ' * 4 * indentation}// Divide by {operand}")
# Default case - just comment the assembly
else:
cpp_code.append(f"{' ' * 4 * indentation}// {instruction}")
i += 1
# Add main function
cpp_code.append("\nint main() {")
if functions:
first_function = next(iter(functions))
cpp_code.append(f" {first_function}();")
else:
cpp_code.append(" // No clear entry point found")
cpp_code.append(" return 0;")
cpp_code.append("}")
return "\n".join(cpp_code)
def try_pyinstaller_extraction(file_path, output_dir):
"""Attempt to extract Python scripts from PyInstaller executables"""
try:
# Run pyinstaller-extractor on the file
# Try both potential command names
try:
result = subprocess.run(["python", "-m", "pyinstxtractor", file_path],
cwd=output_dir, capture_output=True, text=True)
except:
try:
result = subprocess.run(["python", "-m", "pyinstaller_extractor", file_path],
cwd=output_dir, capture_output=True, text=True)
except:
# Direct command attempt
result = subprocess.run(["pyinstxtractor", file_path],
cwd=output_dir, capture_output=True, text=True)
extracted_dir = os.path.join(output_dir, os.path.basename(file_path) + "_extracted")
if not os.path.exists(extracted_dir):
# Try with different naming convention
potential_dirs = [d for d in os.listdir(output_dir) if os.path.isdir(os.path.join(output_dir, d)) and "_extracted" in d]
if potential_dirs:
extracted_dir = os.path.join(output_dir, potential_dirs[0])
if os.path.exists(extracted_dir):
# Try to decompile the Python bytecode files
python_files = {}
for root, _, files in os.walk(extracted_dir):
for file in files:
if file.endswith('.pyc') or file.endswith('.pyo'):
pyc_path = os.path.join(root, file)
py_path = pyc_path + ".py"
try:
# Try decompiling with uncompyle6
subprocess.run(["uncompyle6", pyc_path, "-o", py_path], capture_output=True)
if os.path.exists(py_path):
with open(py_path, 'r', encoding='utf-8', errors='ignore') as f:
rel_path = os.path.relpath(pyc_path, extracted_dir)
python_files[rel_path] = f.read()
except:
pass
# Check if we found the PYZ archive (PyInstaller specific)
pyz_files = []
for root, _, files in os.walk(extracted_dir):
for file in files:
if file.endswith('.pyz'):
pyz_path = os.path.join(root, file)
# Extract PYZ contents
pyz_extract_dir = pyz_path + "_extracted"
os.makedirs(pyz_extract_dir, exist_ok=True)
try:
# Try different extraction methods for the PYZ
try:
subprocess.run(["python", "-m", "pyinstxtractor", pyz_path],
cwd=output_dir, capture_output=True)
except:
try:
subprocess.run(["python", "-m", "pyinstaller_extractor", pyz_path],
cwd=output_dir, capture_output=True)
except:
# Direct command attempt
subprocess.run(["pyinstxtractor", pyz_path],
cwd=output_dir, capture_output=True)
# Look for extracted PYZ content
if os.path.exists(pyz_extract_dir):
for pyz_root, _, pyz_files in os.walk(pyz_extract_dir):
for pyz_file in pyz_files:
if pyz_file.endswith('.pyc') or pyz_file.endswith('.pyo'):
pyc_path = os.path.join(pyz_root, pyz_file)
py_path = pyc_path + ".py"
try:
subprocess.run(["uncompyle6", pyc_path, "-o", py_path], capture_output=True)
if os.path.exists(py_path):
with open(py_path, 'r', encoding='utf-8', errors='ignore') as f:
rel_path = os.path.join("PYZ_ARCHIVE", os.path.relpath(pyc_path, pyz_extract_dir))
python_files[rel_path] = f.read()
except:
pass
except:
pass
return {
"success": True,
"message": "Successfully extracted Python code",
"files": python_files
}
return {
"success": False,
"message": "Not a PyInstaller executable or extraction failed"
}
except Exception as e:
return {
"success": False,
"message": f"PyInstaller extraction error: {str(e)}"
}
def disassemble_binary(file_path, is_dll=False):
"""Disassemble a binary file to get assembly code"""
try:
# Try with radare2 first
try:
import r2pipe
r2 = r2pipe.open(file_path)
r2.cmd("aaa") # Analyze all
# Get main or entry point disassembly
main_disasm = r2.cmd("s main; pdf")
if not main_disasm or "Cannot find function" in main_disasm:
main_disasm = r2.cmd("s entry0; pdf") # Try entry point instead
# Get list of functions
functions = r2.cmd("afl")
# Get all functions disassembly for more complete code
all_functions_disasm = []
function_addresses = re.findall(r'0x[0-9a-fA-F]+', functions)
for addr in function_addresses[:10]: # Limit to first 10 functions to avoid huge output
func_disasm = r2.cmd(f"s {addr}; pdf")
all_functions_disasm.append(func_disasm)
r2.quit()
# Extract assembly instructions
assembly_lines = []
for disasm in [main_disasm] + all_functions_disasm:
for line in disasm.splitlines():
if "β”‚" in line: # radare2 format contains this separator
parts = line.split("β”‚")
if len(parts) > 1:
addr_part = parts[0].strip()
instr_part = parts[-1].strip()
if addr_part and instr_part and "0x" in addr_part:
address = addr_part.strip()
instruction = instr_part.strip()
assembly_lines.append(f"{address}: {instruction}")
return assembly_lines
except:
# Fallback to pefile + capstone if radare2 fails
pe = pefile.PE(file_path)
# Determine if 32-bit or 64-bit
is_64bit = pe.OPTIONAL_HEADER.Magic == 0x20b
mode = CS_MODE_64 if is_64bit else CS_MODE_32
# Initialize disassembler
md = Cs(CS_ARCH_X86, mode)
md.detail = True
assembly_lines = []
# Find and disassemble code sections
for section in pe.sections:
if section.Characteristics & 0x20000000: # IMAGE_SCN_CNT_CODE
section_data = pe.get_data(section.VirtualAddress, section.SizeOfRawData)
section_addr = pe.OPTIONAL_HEADER.ImageBase + section.VirtualAddress
# Disassemble section code
for i, (address, size, mnemonic, op_str) in enumerate(md.disasm_lite(section_data, section_addr)):
if i >= 500: # Limit to 500 instructions per section
break
assembly_lines.append(f"0x{address:08x}: {mnemonic} {op_str}")
return assembly_lines
except Exception as e:
return [f"Disassembly error: {str(e)}"]
def analyze_binary(file_path, is_dll=False):
"""Comprehensive analysis and decompilation of a binary file"""
try:
results = {}
# Basic PE information using pefile
pe = pefile.PE(file_path)
# General info
results["basic_info"] = {
"Machine": hex(pe.FILE_HEADER.Machine),
"TimeDateStamp": pe.FILE_HEADER.TimeDateStamp,
"NumberOfSections": pe.FILE_HEADER.NumberOfSections,
"SizeOfImage": pe.OPTIONAL_HEADER.SizeOfImage,
"EntryPoint": hex(pe.OPTIONAL_HEADER.AddressOfEntryPoint),
"ImageBase": hex(pe.OPTIONAL_HEADER.ImageBase),
"Subsystem": pe.OPTIONAL_HEADER.Subsystem
}
# Section information
results["sections"] = []
for section in pe.sections:
section_name = section.Name.decode('utf-8', errors='ignore').strip('\x00')
results["sections"].append({
"Name": section_name,
"VirtualAddress": hex(section.VirtualAddress),
"VirtualSize": section.Misc_VirtualSize,
"SizeOfRawData": section.SizeOfRawData,
"Entropy": section.get_entropy(),
"Characteristics": hex(section.Characteristics)
})
# Import information
results["imports"] = []
if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'):
for entry in pe.DIRECTORY_ENTRY_IMPORT:
dll_name = entry.dll.decode('utf-8', errors='ignore')
imports = []
for imp in entry.imports:
if imp.name:
imports.append(imp.name.decode('utf-8', errors='ignore'))
results["imports"].append({
"DLL": dll_name,
"Functions": imports
})
# Export information (for DLLs)
results["exports"] = []
if is_dll and hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'):
for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols:
if exp.name:
results["exports"].append({
"Name": exp.name.decode('utf-8', errors='ignore'),
"Address": hex(pe.OPTIONAL_HEADER.ImageBase + exp.address),
"Ordinal": exp.ordinal
})
# Extract strings
results["strings"] = extract_strings(file_path)
# Disassemble to get assembly
assembly_lines = disassemble_binary(file_path, is_dll)
results["assembly_lines"] = assembly_lines
# Convert assembly to Python
results["python_code"] = assembly_to_python(assembly_lines)
# Convert assembly to C++
results["cpp_code"] = assembly_to_cpp(assembly_lines)
return results
except Exception as e:
return {"Error": str(e)}
def process_zip_file(file_path, temp_dir):
"""Process a ZIP file and extract its contents"""
try:
with zipfile.ZipFile(file_path, 'r') as zip_ref:
# Get file list before extraction
file_list = zip_ref.namelist()
# Extract to temp directory
zip_ref.extractall(temp_dir)
# Check for nested executables
nested_files = {}
for root, _, files in os.walk(temp_dir):
for file in files:
full_path = os.path.join(root, file)
rel_path = os.path.relpath(full_path, temp_dir)
if file.endswith('.exe'):
exe_output_dir = os.path.join(temp_dir, f"{file}_unpacked")
os.makedirs(exe_output_dir, exist_ok=True)
nested_files[rel_path] = {
'type': 'exe',
'analysis': analyze_binary(full_path),
'python_extraction': try_pyinstaller_extraction(full_path, exe_output_dir)
}
elif file.endswith('.dll'):
nested_files[rel_path] = {
'type': 'dll',
'analysis': analyze_binary(full_path, is_dll=True)
}
return {
'file_list': file_list,
'nested_files': nested_files
}
except Exception as e:
return {'error': str(e)}
# Main app logic
uploaded_file = st.file_uploader("Upload a file (.zip, .exe, or .dll)", type=["zip", "exe", "dll"])
if uploaded_file is not None:
with tempfile.TemporaryDirectory() as temp_dir:
# Save the uploaded file to the temporary directory
file_path = os.path.join(temp_dir, uploaded_file.name)
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
st.success(f"File uploaded: {uploaded_file.name}")
# Display file size
file_size = os.path.getsize(file_path)
st.info(f"File size: {file_size/1024:.2f} KB")
# Process based on file type
if uploaded_file.name.lower().endswith('.zip'):
st.subheader("ZIP File Contents")
with st.spinner("Analyzing ZIP contents..."):
output_dir = os.path.join(temp_dir, "extracted")
os.makedirs(output_dir, exist_ok=True)
result = process_zip_file(file_path, output_dir)
if 'error' in result:
st.error(f"Error processing ZIP file: {result['error']}")
else:
with st.expander("ZIP Contents", expanded=True):
st.write(f"Total files: {len(result['file_list'])}")
st.code("\n".join(result['file_list']))
if result['nested_files']:
st.subheader("Detected Executable Files")
for file_path, file_info in result['nested_files'].items():
with st.expander(f"{file_path} ({file_info['type'].upper()})"):
if file_info['type'] == 'exe':
analysis = file_info['analysis']
python_extraction = file_info['python_extraction']
tabs = st.tabs(["Python Code", "C++ Code", "Assembly", "Imports", "Strings"])
with tabs[0]:
if "Error" in analysis:
st.error(analysis["Error"])
else:
# Check if we have extracted Python code
if python_extraction.get("success", False):
st.success("Original Python code extracted successfully!")
for filename, content in python_extraction.get("files", {}).items():
with st.expander(f"Python File: {filename}"):
st.code(content, language="python")
else:
# Show decompiled Python from assembly
st.warning("Converting assembly to Python code (not original source)")
st.code(analysis.get("python_code", "# Failed to generate Python code"), language="python")
with tabs[1]:
st.subheader("Decompiled C++ Code")
st.code(analysis.get("cpp_code", "// Failed to generate C++ code"), language="cpp")
with tabs[2]:
st.subheader("Assembly Code")
assembly = "\n".join(analysis.get("assembly_lines", []))
st.code(assembly, language="asm")
with tabs[3]:
st.subheader("Imported Functions")
for imp in analysis.get("imports", []):
with st.expander(f"DLL: {imp['DLL']}"):
st.code("\n".join(imp["Functions"]))
with tabs[4]:
st.subheader("Strings Found")
all_strings = analysis.get("strings", [])
interesting_strings = [s for s in all_strings if len(s) > 8] # Filter out very short strings
st.code("\n".join(interesting_strings[:500])) # Limit to 500 strings
else: # DLL
analysis = file_info['analysis']
tabs = st.tabs(["Python Code", "C++ Code", "Assembly", "Exports", "Imports", "Strings"])
with tabs[0]:
if "Error" in analysis:
st.error(analysis["Error"])
else:
st.subheader("Decompiled Python Code")
st.code(analysis.get("python_code", "# Failed to generate Python code"), language="python")
with tabs[1]:
st.subheader("Decompiled C++ Code")
st.code(analysis.get("cpp_code", "// Failed to generate C++ code"), language="cpp")
with tabs[2]:
st.subheader("Assembly Code")
assembly = "\n".join(analysis.get("assembly_lines", []))
st.code(assembly, language="asm")
with tabs[3]:
st.subheader("Exported Functions")
st.json(analysis.get("exports", []))
with tabs[4]:
st.subheader("Imported Functions")
for imp in analysis.get("imports", []):
with st.expander(f"DLL: {imp['DLL']}"):
st.code("\n".join(imp["Functions"]))
with tabs[5]:
st.subheader("Strings Found")
all_strings = analysis.get("strings", [])
interesting_strings = [s for s in all_strings if len(s) > 8] # Filter out very short strings
st.code("\n".join(interesting_strings[:500])) # Limit to 500 strings
elif uploaded_file.name.lower().endswith('.exe'):
st.subheader("EXE File Analysis")
with st.spinner("Decompiling executable..."):
output_dir = os.path.join(temp_dir, "exe_unpacked")
os.makedirs(output_dir, exist_ok=True)
# Perform comprehensive analysis
analysis = analyze_binary(file_path)
# Try Python extraction
python_extraction = try_pyinstaller_extraction(file_path, output_dir)
tabs = st.tabs(["Python Code", "C++ Code", "Assembly", "Summary", "Imports", "Strings"])
with tabs[0]:
if "Error" in analysis:
st.error(analysis["Error"])
else:
# Check if we have extracted Python code
if python_extraction.get("success", False):
st.success("Original Python code extracted successfully!")
for filename, content in python_extraction.get("files", {}).items():
with st.expander(f"Python File: {filename}"):
st.code(content, language="python")
else:
# Show decompiled Python from assembly
st.warning("Converting assembly to Python code (not original source)")
st.code(analysis.get("python_code", "# Failed to generate Python code"), language="python")
with tabs[1]:
st.subheader("Decompiled C++ Code")
st.code(analysis.get("cpp_code", "// Failed to generate C++ code"), language="cpp")
with tabs[2]:
st.subheader("Assembly Code")
assembly = "\n".join(analysis.get("assembly_lines", []))
st.code(assembly, language="asm")
with tabs[3]:
st.subheader("Basic Information")
st.json(analysis.get("basic_info", {}))
st.subheader("Sections")
sections_df = {
"Name": [],
"VirtualSize": [],
"SizeOfRawData": [],
"Entropy": []
}
for section in analysis.get("sections", []):
sections_df["Name"].append(section["Name"])
sections_df["VirtualSize"].append(section["VirtualSize"])
sections_df["SizeOfRawData"].append(section["SizeOfRawData"])
sections_df["Entropy"].append(section["Entropy"])
st.dataframe(sections_df)
with tabs[4]:
st.subheader("Imported Functions")
for imp in analysis.get("imports", []):
with st.expander(f"DLL: {imp['DLL']}"):
st.code("\n".join(imp["Functions"]))
with tabs[5]:
st.subheader("Strings Found")
all_strings = analysis.get("strings", [])
interesting_strings = [s for s in all_strings if len(s) > 8] # Filter out very short strings
st.code("\n".join(interesting_strings[:500])) # Limit to 500 strings
elif uploaded_file.name.lower().endswith('.dll'):
st.subheader("DLL File Analysis")
with st.spinner("Decompiling DLL..."):
# Perform comprehensive analysis (with is_dll=True)
analysis = analyze_binary(file_path, is_dll=True)
tabs = st.tabs(["Python Code", "C++ Code", "Assembly", "Summary", "Exports", "Imports", "Strings"])
with tabs[0]:
if "Error" in analysis:
st.error(analysis["Error"])
else:
st.subheader("Decompiled Python Code")
st.code(analysis.get("python_code", "# Failed to generate Python code"), language="python")
with tabs[1]:
st.subheader("Decompiled C++ Code")
st.code(analysis.get("cpp_code", "// Failed to generate C++ code"), language="cpp")
with tabs[2]:
st.subheader("Assembly Code")
assembly = "\n".join(analysis.get("assembly_lines", []))
st.code(assembly, language="asm")
with tabs[3]:
st.subheader("Basic Information")
st.json(analysis.get("basic_info", {}))
st.subheader("Sections")
sections_df = {
"Name": [],
"VirtualSize": [],
"SizeOfRawData": [],
"Entropy": []
}
for section in analysis.get("sections", []):
sections_df["Name"].append(section["Name"])
sections_df["VirtualSize"].append(section["VirtualSize"])
sections_df["SizeOfRawData"].append(section["SizeOfRawData"])
sections_df["Entropy"].append(section["Entropy"])
st.dataframe(sections_df)
with tabs[4]:
st.subheader("Exported Functions")
st.json(analysis.get("exports", []))
with tabs[5]:
st.subheader("Imported Functions")
for imp in analysis.get("imports", []):
with st.expander(f"DLL: {imp['DLL']}"):
st.code("\n".join(imp["Functions"]))
with tabs[6]:
st.subheader("Strings Found")
all_strings = analysis.get("strings", [])
interesting_strings = [s for s in all_strings if len(s) > 8] # Filter out very short strings
st.code("\n".join(interesting_strings[:500])) # Limit to 500 strings