Exe_unpacker / app.py
euler314's picture
Update app.py
157cf12 verified
raw
history blame
23.5 kB
import streamlit as st
import os
import zipfile
import tempfile
import pefile
import shutil
import subprocess
import re
import struct
from pathlib import Path
from capstone import Cs, CS_ARCH_X86, CS_MODE_32, CS_MODE_64
st.set_page_config(page_title="Advanced File Analyzer", page_icon="πŸ”", layout="wide")
st.title("Advanced File Analysis Tool")
st.markdown("""
This tool allows you to:
- Extract and view contents of .zip files
- Decompile .exe files to Python-like code
- View decompiled code from .dll files
- Automatically analyze nested executables
""")
def try_pyinstaller_extraction(file_path, output_dir):
"""Attempt to extract Python scripts from PyInstaller executables"""
try:
# Install pyinstxtractor if not already present
subprocess.run(["pip", "install", "pyinstxtractor"], capture_output=True)
# Run pyinstxtractor on the file
result = subprocess.run(["python", "-m", "pyinstxtractor", file_path],
cwd=output_dir, capture_output=True, text=True)
extracted_dir = os.path.join(output_dir, os.path.basename(file_path) + "_extracted")
if os.path.exists(extracted_dir):
# Try to decompile the Python bytecode files
subprocess.run(["pip", "install", "uncompyle6"], capture_output=True)
python_files = {}
for root, _, files in os.walk(extracted_dir):
for file in files:
if file.endswith('.pyc') or file.endswith('.pyo'):
pyc_path = os.path.join(root, file)
py_path = pyc_path + ".py"
try:
subprocess.run(["uncompyle6", pyc_path, "-o", py_path], capture_output=True)
if os.path.exists(py_path):
with open(py_path, 'r', encoding='utf-8', errors='ignore') as f:
rel_path = os.path.relpath(pyc_path, extracted_dir)
python_files[rel_path] = f.read()
except:
pass
return {
"success": True,
"message": "Successfully extracted Python code",
"files": python_files
}
return {
"success": False,
"message": "Not a PyInstaller executable or extraction failed"
}
except Exception as e:
return {
"success": False,
"message": f"PyInstaller extraction error: {str(e)}"
}
def disassemble_binary(file_path, is_dll=False):
"""Disassemble a binary file to approximate code"""
try:
pe = pefile.PE(file_path)
# Determine if 32-bit or 64-bit
is_64bit = pe.OPTIONAL_HEADER.Magic == 0x20b
mode = CS_MODE_64 if is_64bit else CS_MODE_32
# Initialize disassembler
md = Cs(CS_ARCH_X86, mode)
md.detail = True
code_sections = []
# Find the entry point for EXEs
entry_point = None
if not is_dll:
entry_rva = pe.OPTIONAL_HEADER.AddressOfEntryPoint
for section in pe.sections:
if section.contains_rva(entry_rva):
entry_offset = entry_rva - section.VirtualAddress + section.PointerToRawData
entry_point = entry_offset
# Process each section
for section in pe.sections:
section_name = section.Name.decode('utf-8', errors='ignore').strip('\x00')
# Focus on code sections
if section.Characteristics & 0x20000000: # IMAGE_SCN_CNT_CODE
section_data = pe.get_data(section.VirtualAddress, section.SizeOfRawData)
# Start disassembly from entry point if in this section
start_offset = 0
if entry_point is not None and section.contains_rva(pe.OPTIONAL_HEADER.AddressOfEntryPoint):
start_offset = entry_point - section.PointerToRawData
# Limit to reasonable size for preview
preview_size = min(len(section_data) - start_offset, 4096)
# Disassemble
disassembly = []
is_entry = entry_point is not None and section.contains_rva(pe.OPTIONAL_HEADER.AddressOfEntryPoint)
for i, (address, size, mnemonic, op_str) in enumerate(md.disasm_lite(
section_data[start_offset:start_offset+preview_size],
section.VirtualAddress + start_offset)):
# Mark entry point
entry_marker = "ENTRY POINT -> " if is_entry and i == 0 else ""
disassembly.append(f"{entry_marker}0x{address:08x}: {mnemonic} {op_str}")
code_sections.append({
"name": section_name,
"disassembly": disassembly
})
# Get exports for DLLs
exports = []
if is_dll and hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'):
for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols:
if exp.name:
exports.append(exp.name.decode('utf-8', errors='ignore'))
# Convert to pseudo-Python
pseudo_python = generate_pseudo_python(code_sections, exports, is_dll)
return {
"success": True,
"code_sections": code_sections,
"exports": exports,
"pseudo_python": pseudo_python
}
except Exception as e:
return {
"success": False,
"message": f"Disassembly error: {str(e)}"
}
def generate_pseudo_python(code_sections, exports, is_dll):
"""Generate pseudo-Python code from disassembly"""
pseudo_code = []
pseudo_code.append("# This is an approximation generated from binary code")
pseudo_code.append("# It is NOT the original source code")
pseudo_code.append("")
if is_dll:
pseudo_code.append("# DLL Export Functions")
for export in exports:
pseudo_code.append(f"def {export}():")
pseudo_code.append(" # Implementation not recoverable from binary")
pseudo_code.append(" pass")
pseudo_code.append("")
for section in code_sections:
pseudo_code.append(f"# Code Section: {section['name']}")
pseudo_code.append("def main():")
# Very basic pattern recognition for common operations
in_function = False
current_function = None
# Extract common patterns from assembly
for line in section['disassembly'][:100]: # Limit to first 100 instructions for preview
# Look for call instructions to infer function calls
if "call" in line and "0x" in line:
addr = re.search(r'0x[0-9a-f]+', line)
if addr:
pseudo_code.append(f" call_function_{addr.group(0)}()")
# Look for mov instructions to infer variable assignments
elif "mov" in line:
reg_match = re.search(r'mov\s+(\w+),\s+(.+)', line)
if reg_match:
dest, source = reg_match.groups()
pseudo_code.append(f" {dest} = {source} # {line}")
# Look for common comparisons
elif "cmp" in line:
cmp_match = re.search(r'cmp\s+(\w+),\s+(.+)', line)
if cmp_match:
a, b = cmp_match.groups()
pseudo_code.append(f" if {a} == {b}: # {line}")
pseudo_code.append(" pass")
else:
pseudo_code.append(f" # {line}")
pseudo_code.append("")
pseudo_code.append("if __name__ == '__main__':")
pseudo_code.append(" main()")
pseudo_code.append("")
return "\n".join(pseudo_code)
def unpack_exe(file_path, output_dir):
"""Extract information from an EXE file and attempt to convert to Python"""
try:
pe = pefile.PE(file_path)
# Basic PE information
info = {
"Machine": hex(pe.FILE_HEADER.Machine),
"TimeDateStamp": pe.FILE_HEADER.TimeDateStamp,
"NumberOfSections": pe.FILE_HEADER.NumberOfSections,
"Sections": []
}
# Get section information
for section in pe.sections:
section_name = section.Name.decode('utf-8', errors='ignore').strip('\x00')
info["Sections"].append({
"Name": section_name,
"VirtualAddress": hex(section.VirtualAddress),
"SizeOfRawData": section.SizeOfRawData,
"Entropy": section.get_entropy()
})
# Get imports
if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'):
info["Imports"] = []
for entry in pe.DIRECTORY_ENTRY_IMPORT:
dll_name = entry.dll.decode('utf-8', errors='ignore')
imports = []
for imp in entry.imports:
if imp.name:
imports.append(imp.name.decode('utf-8', errors='ignore'))
info["Imports"].append({
"DLL": dll_name,
"Functions": imports
})
# Extract resources if present
if hasattr(pe, 'DIRECTORY_ENTRY_RESOURCE'):
resource_dir = os.path.join(output_dir, "resources")
os.makedirs(resource_dir, exist_ok=True)
extracted_resources = []
for resource_type in pe.DIRECTORY_ENTRY_RESOURCE.entries:
if hasattr(resource_type, 'directory'):
for resource_id in resource_type.directory.entries:
if hasattr(resource_id, 'directory'):
for resource_lang in resource_id.directory.entries:
data = pe.get_data(resource_lang.data.struct.OffsetToData, resource_lang.data.struct.Size)
resource_filename = f"resource_{resource_type.id}_{resource_id.id}_{resource_lang.id}"
resource_path = os.path.join(resource_dir, resource_filename)
with open(resource_path, 'wb') as f:
f.write(data)
extracted_resources.append(resource_filename)
info["ExtractedResources"] = extracted_resources
# Try to disassemble and convert to pseudo-Python
disassembly_result = disassemble_binary(file_path)
# Try PyInstaller extraction for Python executables
pyinstaller_result = try_pyinstaller_extraction(file_path, output_dir)
return {
"basic_info": info,
"disassembly": disassembly_result,
"pyinstaller": pyinstaller_result
}
except Exception as e:
return {"Error": str(e)}
def analyze_dll(file_path):
"""Extract information and code from a DLL file"""
try:
pe = pefile.PE(file_path)
# Basic information
info = {
"Machine": hex(pe.FILE_HEADER.Machine),
"TimeDateStamp": pe.FILE_HEADER.TimeDateStamp,
"NumberOfSections": pe.FILE_HEADER.NumberOfSections,
"Characteristics": hex(pe.FILE_HEADER.Characteristics),
"DllCharacteristics": hex(pe.OPTIONAL_HEADER.DllCharacteristics),
"Sections": []
}
# Get section information
for section in pe.sections:
section_name = section.Name.decode('utf-8', errors='ignore').strip('\x00')
info["Sections"].append({
"Name": section_name,
"VirtualAddress": hex(section.VirtualAddress),
"SizeOfRawData": section.SizeOfRawData,
"Entropy": section.get_entropy()
})
# Get exports
if hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'):
info["Exports"] = []
for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols:
if exp.name:
info["Exports"].append(exp.name.decode('utf-8', errors='ignore'))
# Get imports
if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'):
info["Imports"] = []
for entry in pe.DIRECTORY_ENTRY_IMPORT:
dll_name = entry.dll.decode('utf-8', errors='ignore')
imports = []
for imp in entry.imports:
if imp.name:
imports.append(imp.name.decode('utf-8', errors='ignore'))
info["Imports"].append({
"DLL": dll_name,
"Functions": imports
})
# Try to disassemble and convert to pseudo-Python
disassembly_result = disassemble_binary(file_path, is_dll=True)
return {
"basic_info": info,
"disassembly": disassembly_result
}
except Exception as e:
return {"Error": str(e)}
def process_zip_file(file_path, temp_dir):
"""Process a ZIP file and extract its contents"""
try:
with zipfile.ZipFile(file_path, 'r') as zip_ref:
# Get file list before extraction
file_list = zip_ref.namelist()
# Extract to temp directory
zip_ref.extractall(temp_dir)
# Check for nested executables
nested_files = {}
for root, _, files in os.walk(temp_dir):
for file in files:
full_path = os.path.join(root, file)
rel_path = os.path.relpath(full_path, temp_dir)
if file.endswith('.exe'):
exe_output_dir = os.path.join(temp_dir, f"{file}_unpacked")
os.makedirs(exe_output_dir, exist_ok=True)
nested_files[rel_path] = {
'type': 'exe',
'info': unpack_exe(full_path, exe_output_dir)
}
elif file.endswith('.dll'):
nested_files[rel_path] = {
'type': 'dll',
'info': analyze_dll(full_path)
}
return {
'file_list': file_list,
'nested_files': nested_files
}
except Exception as e:
return {'error': str(e)}
# Main app logic
uploaded_file = st.file_uploader("Upload a file (.zip, .exe, or .dll)", type=["zip", "exe", "dll"])
if uploaded_file is not None:
with tempfile.TemporaryDirectory() as temp_dir:
# Save the uploaded file to the temporary directory
file_path = os.path.join(temp_dir, uploaded_file.name)
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
st.success(f"File uploaded: {uploaded_file.name}")
# Process based on file type
if uploaded_file.name.lower().endswith('.zip'):
st.subheader("ZIP File Contents")
output_dir = os.path.join(temp_dir, "extracted")
os.makedirs(output_dir, exist_ok=True)
result = process_zip_file(file_path, output_dir)
if 'error' in result:
st.error(f"Error processing ZIP file: {result['error']}")
else:
with st.expander("ZIP Contents", expanded=True):
st.write(f"Total files: {len(result['file_list'])}")
st.json(result['file_list'])
if result['nested_files']:
st.subheader("Detected Executable Files")
for file_path, file_info in result['nested_files'].items():
with st.expander(f"{file_path} ({file_info['type'].upper()})"):
if file_info['type'] == 'exe':
tabs = st.tabs(["Basic Info", "Python Code", "Disassembly"])
with tabs[0]:
st.json(file_info['info'].get('basic_info', {}))
with tabs[1]:
pyinstaller_result = file_info['info'].get('pyinstaller', {})
disassembly = file_info['info'].get('disassembly', {})
if pyinstaller_result.get('success', False):
st.success("Python code extracted successfully!")
for filename, content in pyinstaller_result.get('files', {}).items():
with st.expander(f"Python File: {filename}"):
st.code(content, language="python")
else:
st.warning("Not a Python executable or extraction failed.")
st.subheader("Generated Python-like Code")
st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python")
with tabs[2]:
disassembly = file_info['info'].get('disassembly', {})
if disassembly.get('success', False):
for section in disassembly.get('code_sections', []):
with st.expander(f"Section: {section['name']}"):
st.code("\n".join(section['disassembly']), language="asm")
else:
st.error(disassembly.get('message', "Disassembly failed"))
else: # DLL
tabs = st.tabs(["Basic Info", "DLL Code", "Exports/Imports"])
with tabs[0]:
st.json(file_info['info'].get('basic_info', {}))
with tabs[1]:
disassembly = file_info['info'].get('disassembly', {})
st.subheader("Generated Python-like Code")
st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python")
with tabs[2]:
basic_info = file_info['info'].get('basic_info', {})
if 'Exports' in basic_info:
st.subheader("Exported Functions")
st.json(basic_info['Exports'])
if 'Imports' in basic_info:
st.subheader("Imported DLLs and Functions")
st.json(basic_info['Imports'])
elif uploaded_file.name.lower().endswith('.exe'):
st.subheader("EXE File Analysis and Decompilation")
output_dir = os.path.join(temp_dir, "exe_unpacked")
os.makedirs(output_dir, exist_ok=True)
try:
exe_info = unpack_exe(file_path, output_dir)
tabs = st.tabs(["Basic Info", "Python Code", "Disassembly", "Resources"])
with tabs[0]:
st.json(exe_info.get('basic_info', {}))
with tabs[1]:
pyinstaller_result = exe_info.get('pyinstaller', {})
disassembly = exe_info.get('disassembly', {})
if pyinstaller_result.get('success', False):
st.success("Python code extracted successfully!")
for filename, content in pyinstaller_result.get('files', {}).items():
with st.expander(f"Python File: {filename}"):
st.code(content, language="python")
else:
st.warning(pyinstaller_result.get('message', "Not a Python executable or extraction failed."))
st.subheader("Generated Python-like Code")
st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python")
with tabs[2]:
disassembly = exe_info.get('disassembly', {})
if disassembly.get('success', False):
for section in disassembly.get('code_sections', []):
with st.expander(f"Section: {section['name']}"):
st.code("\n".join(section['disassembly']), language="asm")
else:
st.error(disassembly.get('message', "Disassembly failed"))
with tabs[3]:
basic_info = exe_info.get('basic_info', {})
if 'ExtractedResources' in basic_info:
st.write(f"Found {len(basic_info['ExtractedResources'])} resources")
for resource in basic_info['ExtractedResources']:
st.text(f"Resource: {resource}")
except Exception as e:
st.error(f"Error analyzing EXE file: {str(e)}")
elif uploaded_file.name.lower().endswith('.dll'):
st.subheader("DLL File Analysis and Decompilation")
try:
dll_info = analyze_dll(file_path)
tabs = st.tabs(["Basic Info", "DLL Code", "Exports/Imports"])
with tabs[0]:
st.json(dll_info.get('basic_info', {}))
with tabs[1]:
disassembly = dll_info.get('disassembly', {})
st.subheader("Generated Python-like Code")
st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python")
if disassembly.get('success', False):
st.subheader("Assembly Code")
for section in disassembly.get('code_sections', []):
with st.expander(f"Section: {section['name']}"):
st.code("\n".join(section['disassembly']), language="asm")
with tabs[2]:
basic_info = dll_info.get('basic_info', {})
if 'Exports' in basic_info:
st.subheader("Exported Functions")
st.json(basic_info['Exports'])
if 'Imports' in basic_info:
st.subheader("Imported DLLs and Functions")
st.json(basic_info['Imports'])
except Exception as e:
st.error(f"Error analyzing DLL file: {str(e)}")