Spaces:
Building
Building
import streamlit as st | |
import os | |
import zipfile | |
import tempfile | |
import pefile | |
import subprocess | |
import binascii | |
import re | |
import json | |
from pathlib import Path | |
st.set_page_config(page_title="Executable Reverse Engineer", page_icon="🔍", layout="wide") | |
st.title("Executable Reverse Engineering Tool") | |
st.markdown(""" | |
This tool performs reverse engineering on executables (.exe/.dll) to show their inner workings: | |
- Disassembles machine code to view assembly instructions | |
- Attempts to decompile to pseudocode | |
- Shows strings, imports, and other binary artifacts | |
- Works with executables from any programming language | |
""") | |
# Install necessary packages at startup | |
try: | |
with st.spinner("Setting up reverse engineering environment..."): | |
# Install key analysis libraries | |
subprocess.run(["pip", "install", "pyinstaller-extractor"], capture_output=True) | |
subprocess.run(["pip", "install", "uncompyle6"], capture_output=True) | |
subprocess.run(["pip", "install", "capstone"], capture_output=True) | |
subprocess.run(["pip", "install", "r2pipe"], capture_output=True) | |
# Import after installation | |
import r2pipe | |
from capstone import Cs, CS_ARCH_X86, CS_MODE_32, CS_MODE_64 | |
st.success("Environment ready") | |
except Exception as e: | |
st.error(f"Setup error: {str(e)}") | |
def extract_strings(file_path, min_length=4): | |
"""Extract ASCII and Unicode strings from binary file""" | |
try: | |
with open(file_path, 'rb') as f: | |
content = f.read() | |
# Extract ASCII strings | |
ascii_pattern = b'[\\x20-\\x7E]{' + str(min_length).encode() + b',}' | |
ascii_strings = re.findall(ascii_pattern, content) | |
ascii_strings = [s.decode('ascii') for s in ascii_strings] | |
# Extract Unicode strings (basic approach) | |
unicode_pattern = b'(?:[\x20-\x7E]\x00){' + str(min_length).encode() + b',}' | |
unicode_strings = re.findall(unicode_pattern, content) | |
unicode_strings = [s.decode('utf-16-le', errors='ignore').rstrip('\x00') for s in unicode_strings] | |
return sorted(set(ascii_strings + unicode_strings)) | |
except Exception as e: | |
return [f"Error extracting strings: {str(e)}"] | |
def analyze_with_radare2(file_path): | |
"""Use radare2 through r2pipe for deep analysis""" | |
try: | |
# Open file with r2pipe | |
r2 = r2pipe.open(file_path) | |
# Perform initial analysis | |
r2.cmd("aaa") # Analyze all | |
# Get basic information | |
info = json.loads(r2.cmd("ij")) | |
# Get entry point | |
entry_point = r2.cmd("ie") | |
# Get imports | |
imports = r2.cmd("iij") | |
imports = json.loads(imports) if imports else [] | |
# Get exports (for DLLs) | |
exports = r2.cmd("iEj") | |
exports = json.loads(exports) if exports else [] | |
# Get sections | |
sections = r2.cmd("iSj") | |
sections = json.loads(sections) if sections else [] | |
# Disassemble main function | |
main_disasm = r2.cmd("s main; pdf") | |
if not main_disasm or "Cannot find function" in main_disasm: | |
main_disasm = r2.cmd("s entry0; pdf") # Try entry point instead | |
# Get function list | |
functions = r2.cmd("aflj") | |
functions = json.loads(functions) if functions else [] | |
# Get decompiled pseudocode (if available) | |
pseudocode = r2.cmd("s main; pdc") | |
if not pseudocode or pseudocode.strip() == "": | |
pseudocode = r2.cmd("s entry0; pdc") # Try entry point instead | |
# Close r2 | |
r2.quit() | |
return { | |
"info": info, | |
"entry_point": entry_point, | |
"imports": imports, | |
"exports": exports, | |
"sections": sections, | |
"main_disasm": main_disasm, | |
"functions": functions, | |
"pseudocode": pseudocode | |
} | |
except Exception as e: | |
return {"error": f"Radare2 analysis failed: {str(e)}"} | |
def try_pyinstaller_extraction(file_path, output_dir): | |
"""Attempt to extract Python scripts from PyInstaller executables""" | |
try: | |
# Run pyinstxtractor on the file | |
result = subprocess.run(["python", "-m", "pyinstaller-extractor", file_path], | |
cwd=output_dir, capture_output=True, text=True) | |
extracted_dir = os.path.join(output_dir, os.path.basename(file_path) + "_extracted") | |
if os.path.exists(extracted_dir): | |
# Try to decompile the Python bytecode files | |
python_files = {} | |
for root, _, files in os.walk(extracted_dir): | |
for file in files: | |
if file.endswith('.pyc') or file.endswith('.pyo'): | |
pyc_path = os.path.join(root, file) | |
py_path = pyc_path + ".py" | |
try: | |
# Try decompiling with uncompyle6 | |
subprocess.run(["uncompyle6", pyc_path, "-o", py_path], capture_output=True) | |
if os.path.exists(py_path): | |
with open(py_path, 'r', encoding='utf-8', errors='ignore') as f: | |
rel_path = os.path.relpath(pyc_path, extracted_dir) | |
python_files[rel_path] = f.read() | |
except: | |
pass | |
# Check if we found the PYZ archive (PyInstaller specific) | |
pyz_files = [] | |
for root, _, files in os.walk(extracted_dir): | |
for file in files: | |
if file.endswith('.pyz'): | |
pyz_path = os.path.join(root, file) | |
# Extract PYZ contents | |
pyz_extract_dir = pyz_path + "_extracted" | |
os.makedirs(pyz_extract_dir, exist_ok=True) | |
try: | |
# Extract PYZ files (these contain most of the Python modules) | |
subprocess.run(["python", "-m", "pyinstxtractor", pyz_path], | |
cwd=output_dir, capture_output=True) | |
for pyz_root, _, pyz_files in os.walk(pyz_extract_dir): | |
for pyz_file in pyz_files: | |
if pyz_file.endswith('.pyc') or pyz_file.endswith('.pyo'): | |
pyc_path = os.path.join(pyz_root, pyz_file) | |
py_path = pyc_path + ".py" | |
try: | |
subprocess.run(["uncompyle6", pyc_path, "-o", py_path], capture_output=True) | |
if os.path.exists(py_path): | |
with open(py_path, 'r', encoding='utf-8', errors='ignore') as f: | |
rel_path = os.path.join("PYZ_ARCHIVE", os.path.relpath(pyc_path, pyz_extract_dir)) | |
python_files[rel_path] = f.read() | |
except: | |
pass | |
except: | |
pass | |
return { | |
"success": True, | |
"message": "Successfully extracted Python code", | |
"files": python_files | |
} | |
return { | |
"success": False, | |
"message": "Not a PyInstaller executable or extraction failed" | |
} | |
except Exception as e: | |
return { | |
"success": False, | |
"message": f"PyInstaller extraction error: {str(e)}" | |
} | |
def analyze_binary(file_path, is_dll=False): | |
"""Comprehensive analysis of a binary file""" | |
try: | |
results = {} | |
# Basic PE information using pefile | |
pe = pefile.PE(file_path) | |
# General info | |
results["basic_info"] = { | |
"Machine": hex(pe.FILE_HEADER.Machine), | |
"TimeDateStamp": pe.FILE_HEADER.TimeDateStamp, | |
"NumberOfSections": pe.FILE_HEADER.NumberOfSections, | |
"SizeOfImage": pe.OPTIONAL_HEADER.SizeOfImage, | |
"EntryPoint": hex(pe.OPTIONAL_HEADER.AddressOfEntryPoint), | |
"ImageBase": hex(pe.OPTIONAL_HEADER.ImageBase), | |
"Subsystem": pe.OPTIONAL_HEADER.Subsystem | |
} | |
# Section information | |
results["sections"] = [] | |
for section in pe.sections: | |
section_name = section.Name.decode('utf-8', errors='ignore').strip('\x00') | |
results["sections"].append({ | |
"Name": section_name, | |
"VirtualAddress": hex(section.VirtualAddress), | |
"VirtualSize": section.Misc_VirtualSize, | |
"SizeOfRawData": section.SizeOfRawData, | |
"Entropy": section.get_entropy(), | |
"Characteristics": hex(section.Characteristics) | |
}) | |
# Import information | |
results["imports"] = [] | |
if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'): | |
for entry in pe.DIRECTORY_ENTRY_IMPORT: | |
dll_name = entry.dll.decode('utf-8', errors='ignore') | |
imports = [] | |
for imp in entry.imports: | |
if imp.name: | |
imports.append(imp.name.decode('utf-8', errors='ignore')) | |
results["imports"].append({ | |
"DLL": dll_name, | |
"Functions": imports | |
}) | |
# Export information (for DLLs) | |
results["exports"] = [] | |
if is_dll and hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'): | |
for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols: | |
if exp.name: | |
results["exports"].append({ | |
"Name": exp.name.decode('utf-8', errors='ignore'), | |
"Address": hex(pe.OPTIONAL_HEADER.ImageBase + exp.address), | |
"Ordinal": exp.ordinal | |
}) | |
# Extract strings | |
results["strings"] = extract_strings(file_path) | |
# Use Radare2 for deeper analysis if available | |
try: | |
r2_results = analyze_with_radare2(file_path) | |
if "error" not in r2_results: | |
results["disassembly"] = r2_results["main_disasm"] | |
results["functions"] = r2_results["functions"] | |
results["pseudocode"] = r2_results["pseudocode"] | |
else: | |
# Fallback to basic disassembly with Capstone | |
from capstone import Cs, CS_ARCH_X86, CS_MODE_32, CS_MODE_64 | |
# Determine if 32-bit or 64-bit | |
is_64bit = pe.OPTIONAL_HEADER.Magic == 0x20b | |
mode = CS_MODE_64 if is_64bit else CS_MODE_32 | |
# Initialize disassembler | |
md = Cs(CS_ARCH_X86, mode) | |
md.detail = True | |
# Find the entry point | |
entry_rva = pe.OPTIONAL_HEADER.AddressOfEntryPoint | |
for section in pe.sections: | |
if section.contains_rva(entry_rva): | |
# Calculate file offset of entry point | |
entry_offset = entry_rva - section.VirtualAddress + section.PointerToRawData | |
entry_data = pe.get_memory_mapped_image()[entry_rva:entry_rva+512] # Get 512 bytes from entry | |
disassembly = [] | |
for i, (address, size, mnemonic, op_str) in enumerate(md.disasm_lite(entry_data, pe.OPTIONAL_HEADER.ImageBase + entry_rva)): | |
if i >= 100: # Limit to 100 instructions for preview | |
break | |
disassembly.append(f"0x{address:08x}: {mnemonic} {op_str}") | |
results["disassembly"] = "\n".join(disassembly) | |
break | |
except ImportError: | |
# If r2pipe or capstone isn't available | |
results["disassembly"] = "Advanced disassembly not available. Install r2pipe or capstone." | |
return results | |
except Exception as e: | |
return {"Error": str(e)} | |
def process_zip_file(file_path, temp_dir): | |
"""Process a ZIP file and extract its contents""" | |
try: | |
with zipfile.ZipFile(file_path, 'r') as zip_ref: | |
# Get file list before extraction | |
file_list = zip_ref.namelist() | |
# Extract to temp directory | |
zip_ref.extractall(temp_dir) | |
# Check for nested executables | |
nested_files = {} | |
for root, _, files in os.walk(temp_dir): | |
for file in files: | |
full_path = os.path.join(root, file) | |
rel_path = os.path.relpath(full_path, temp_dir) | |
if file.endswith('.exe'): | |
exe_output_dir = os.path.join(temp_dir, f"{file}_unpacked") | |
os.makedirs(exe_output_dir, exist_ok=True) | |
nested_files[rel_path] = { | |
'type': 'exe', | |
'analysis': analyze_binary(full_path), | |
'python_extraction': try_pyinstaller_extraction(full_path, exe_output_dir) | |
} | |
elif file.endswith('.dll'): | |
nested_files[rel_path] = { | |
'type': 'dll', | |
'analysis': analyze_binary(full_path, is_dll=True) | |
} | |
return { | |
'file_list': file_list, | |
'nested_files': nested_files | |
} | |
except Exception as e: | |
return {'error': str(e)} | |
# Main app logic | |
uploaded_file = st.file_uploader("Upload a file (.zip, .exe, or .dll)", type=["zip", "exe", "dll"]) | |
if uploaded_file is not None: | |
with tempfile.TemporaryDirectory() as temp_dir: | |
# Save the uploaded file to the temporary directory | |
file_path = os.path.join(temp_dir, uploaded_file.name) | |
with open(file_path, "wb") as f: | |
f.write(uploaded_file.getbuffer()) | |
st.success(f"File uploaded: {uploaded_file.name}") | |
# Display file size | |
file_size = os.path.getsize(file_path) | |
st.info(f"File size: {file_size/1024:.2f} KB") | |
# Process based on file type | |
if uploaded_file.name.lower().endswith('.zip'): | |
st.subheader("ZIP File Contents") | |
with st.spinner("Analyzing ZIP contents..."): | |
output_dir = os.path.join(temp_dir, "extracted") | |
os.makedirs(output_dir, exist_ok=True) | |
result = process_zip_file(file_path, output_dir) | |
if 'error' in result: | |
st.error(f"Error processing ZIP file: {result['error']}") | |
else: | |
with st.expander("ZIP Contents", expanded=True): | |
st.write(f"Total files: {len(result['file_list'])}") | |
st.code("\n".join(result['file_list'])) | |
if result['nested_files']: | |
st.subheader("Detected Executable Files") | |
for file_path, file_info in result['nested_files'].items(): | |
with st.expander(f"{file_path} ({file_info['type'].upper()})"): | |
if file_info['type'] == 'exe': | |
analysis = file_info['analysis'] | |
python_extraction = file_info['python_extraction'] | |
tabs = st.tabs(["Summary", "Imports", "Strings", "Assembly", "Python Code"]) | |
with tabs[0]: | |
if "Error" in analysis: | |
st.error(analysis["Error"]) | |
else: | |
st.json(analysis.get("basic_info", {})) | |
st.subheader("Sections") | |
sections_df = { | |
"Name": [], | |
"VirtualSize": [], | |
"SizeOfRawData": [], | |
"Entropy": [] | |
} | |
for section in analysis.get("sections", []): | |
sections_df["Name"].append(section["Name"]) | |
sections_df["VirtualSize"].append(section["VirtualSize"]) | |
sections_df["SizeOfRawData"].append(section["SizeOfRawData"]) | |
sections_df["Entropy"].append(section["Entropy"]) | |
st.dataframe(sections_df) | |
with tabs[1]: | |
for imp in analysis.get("imports", []): | |
with st.expander(f"DLL: {imp['DLL']}"): | |
st.code("\n".join(imp["Functions"])) | |
with tabs[2]: | |
st.subheader("Strings Found") | |
all_strings = analysis.get("strings", []) | |
interesting_strings = [s for s in all_strings if len(s) > 8] # Filter out very short strings | |
st.code("\n".join(interesting_strings[:500])) # Limit to 500 strings | |
with tabs[3]: | |
st.subheader("Disassembly") | |
if "disassembly" in analysis: | |
st.code(analysis["disassembly"], language="asm") | |
else: | |
st.warning("Disassembly not available") | |
if "pseudocode" in analysis and analysis["pseudocode"]: | |
st.subheader("Decompiled Pseudocode") | |
st.code(analysis["pseudocode"], language="c") | |
with tabs[4]: | |
if python_extraction.get("success", False): | |
st.success("Python code extracted successfully!") | |
for filename, content in python_extraction.get("files", {}).items(): | |
with st.expander(f"Python File: {filename}"): | |
st.code(content, language="python") | |
else: | |
st.warning(python_extraction.get("message", "Not a Python executable or extraction failed.")) | |
else: # DLL | |
analysis = file_info['analysis'] | |
tabs = st.tabs(["Summary", "Exports", "Imports", "Strings", "Assembly"]) | |
with tabs[0]: | |
if "Error" in analysis: | |
st.error(analysis["Error"]) | |
else: | |
st.json(analysis.get("basic_info", {})) | |
st.subheader("Sections") | |
sections_df = { | |
"Name": [], | |
"VirtualSize": [], | |
"SizeOfRawData": [], | |
"Entropy": [] | |
} | |
for section in analysis.get("sections", []): | |
sections_df["Name"].append(section["Name"]) | |
sections_df["VirtualSize"].append(section["VirtualSize"]) | |
sections_df["SizeOfRawData"].append(section["SizeOfRawData"]) | |
sections_df["Entropy"].append(section["Entropy"]) | |
st.dataframe(sections_df) | |
with tabs[1]: | |
st.subheader("Exported Functions") | |
st.json(analysis.get("exports", [])) | |
with tabs[2]: | |
for imp in analysis.get("imports", []): | |
with st.expander(f"DLL: {imp['DLL']}"): | |
st.code("\n".join(imp["Functions"])) | |
with tabs[3]: | |
st.subheader("Strings Found") | |
all_strings = analysis.get("strings", []) | |
interesting_strings = [s for s in all_strings if len(s) > 8] # Filter out very short strings | |
st.code("\n".join(interesting_strings[:500])) # Limit to 500 strings | |
with tabs[4]: | |
st.subheader("Disassembly") | |
if "disassembly" in analysis: | |
st.code(analysis["disassembly"], language="asm") | |
else: | |
st.warning("Disassembly not available") | |
if "pseudocode" in analysis and analysis["pseudocode"]: | |
st.subheader("Decompiled Pseudocode") | |
st.code(analysis["pseudocode"], language="c") | |
elif uploaded_file.name.lower().endswith('.exe'): | |
st.subheader("EXE File Analysis") | |
with st.spinner("Reverse engineering executable..."): | |
output_dir = os.path.join(temp_dir, "exe_unpacked") | |
os.makedirs(output_dir, exist_ok=True) | |
# Perform comprehensive analysis | |
analysis = analyze_binary(file_path) | |
# Try Python extraction | |
python_extraction = try_pyinstaller_extraction(file_path, output_dir) | |
tabs = st.tabs(["Summary", "Imports", "Strings", "Assembly", "Python Code"]) | |
with tabs[0]: | |
if "Error" in analysis: | |
st.error(analysis["Error"]) | |
else: | |
st.subheader("Basic Information") | |
st.json(analysis.get("basic_info", {})) | |
st.subheader("Sections") | |
sections_df = { | |
"Name": [], | |
"VirtualSize": [], | |
"SizeOfRawData": [], | |
"Entropy": [] | |
} | |
for section in analysis.get("sections", []): | |
sections_df["Name"].append(section["Name"]) | |
sections_df["VirtualSize"].append(section["VirtualSize"]) | |
sections_df["SizeOfRawData"].append(section["SizeOfRawData"]) | |
sections_df["Entropy"].append(section["Entropy"]) | |
st.dataframe(sections_df) | |
with tabs[1]: | |
st.subheader("Imported Functions") | |
for imp in analysis.get("imports", []): | |
with st.expander(f"DLL: {imp['DLL']}"): | |
st.code("\n".join(imp["Functions"])) | |
with tabs[2]: | |
st.subheader("Strings Found") | |
all_strings = analysis.get("strings", []) | |
interesting_strings = [s for s in all_strings if len(s) > 8] # Filter out very short strings | |
st.code("\n".join(interesting_strings[:500])) # Limit to 500 strings | |
with tabs[3]: | |
st.subheader("Disassembly") | |
if "disassembly" in analysis: | |
st.code(analysis["disassembly"], language="asm") | |
else: | |
st.warning("Disassembly not available") | |
if "pseudocode" in analysis and analysis["pseudocode"]: | |
st.subheader("Decompiled Pseudocode") | |
st.code(analysis["pseudocode"], language="c") | |
with tabs[4]: | |
if python_extraction.get("success", False): | |
st.success("Python code extracted successfully!") | |
for filename, content in python_extraction.get("files", {}).items(): | |
with st.expander(f"Python File: {filename}"): | |
st.code(content, language="python") | |
else: | |
st.warning(python_extraction.get("message", "Not a Python executable or extraction failed.")) | |
elif uploaded_file.name.lower().endswith('.dll'): | |
st.subheader("DLL File Analysis") | |
with st.spinner("Reverse engineering DLL..."): | |
# Perform comprehensive analysis (with is_dll=True) | |
analysis = analyze_binary(file_path, is_dll=True) | |
tabs = st.tabs(["Summary", "Exports", "Imports", "Strings", "Assembly"]) | |
with tabs[0]: | |
if "Error" in analysis: | |
st.error(analysis["Error"]) | |
else: | |
st.subheader("Basic Information") | |
st.json(analysis.get("basic_info", {})) | |
st.subheader("Sections") | |
sections_df = { | |
"Name": [], | |
"VirtualSize": [], | |
"SizeOfRawData": [], | |
"Entropy": [] | |
} | |
for section in analysis.get("sections", []): | |
sections_df["Name"].append(section["Name"]) | |
sections_df["VirtualSize"].append(section["VirtualSize"]) | |
sections_df["SizeOfRawData"].append(section["SizeOfRawData"]) | |
sections_df["Entropy"].append(section["Entropy"]) | |
st.dataframe(sections_df) | |
with tabs[1]: | |
st.subheader("Exported Functions") | |
st.json(analysis.get("exports", [])) | |
with tabs[2]: | |
st.subheader("Imported Functions") | |
for imp in analysis.get("imports", []): | |
with st.expander(f"DLL: {imp['DLL']}"): | |
st.code("\n".join(imp["Functions"])) | |
with tabs[3]: | |
st.subheader("Strings Found") | |
all_strings = analysis.get("strings", []) | |
interesting_strings = [s for s in all_strings if len(s) > 8] # Filter out very short strings | |
st.code("\n".join(interesting_strings[:500])) # Limit to 500 strings | |
with tabs[4]: | |
st.subheader("Disassembly") | |
if "disassembly" in analysis: | |
st.code(analysis["disassembly"], language="asm") | |
else: | |
st.warning("Disassembly not available") | |
if "pseudocode" in analysis and analysis["pseudocode"]: | |
st.subheader("Decompiled Pseudocode") | |
st.code(analysis["pseudocode"], language="c") |