|
import streamlit as st |
|
import os |
|
import zipfile |
|
import tempfile |
|
import pefile |
|
import shutil |
|
import subprocess |
|
import re |
|
import struct |
|
from pathlib import Path |
|
from capstone import Cs, CS_ARCH_X86, CS_MODE_32, CS_MODE_64 |
|
|
|
st.set_page_config(page_title="Advanced File Analyzer", page_icon="🔍", layout="wide") |
|
|
|
st.title("Advanced File Analysis Tool") |
|
st.markdown(""" |
|
This tool allows you to: |
|
- Extract and view contents of .zip files |
|
- Decompile .exe files to Python-like code |
|
- View decompiled code from .dll files |
|
- Automatically analyze nested executables |
|
""") |
|
|
|
def try_pyinstaller_extraction(file_path, output_dir): |
|
"""Attempt to extract Python scripts from PyInstaller executables""" |
|
try: |
|
|
|
subprocess.run(["pip", "install", "pyinstxtractor"], capture_output=True) |
|
|
|
|
|
result = subprocess.run(["python", "-m", "pyinstxtractor", file_path], |
|
cwd=output_dir, capture_output=True, text=True) |
|
|
|
extracted_dir = os.path.join(output_dir, os.path.basename(file_path) + "_extracted") |
|
|
|
if os.path.exists(extracted_dir): |
|
|
|
subprocess.run(["pip", "install", "uncompyle6"], capture_output=True) |
|
|
|
python_files = {} |
|
for root, _, files in os.walk(extracted_dir): |
|
for file in files: |
|
if file.endswith('.pyc') or file.endswith('.pyo'): |
|
pyc_path = os.path.join(root, file) |
|
py_path = pyc_path + ".py" |
|
try: |
|
subprocess.run(["uncompyle6", pyc_path, "-o", py_path], capture_output=True) |
|
if os.path.exists(py_path): |
|
with open(py_path, 'r', encoding='utf-8', errors='ignore') as f: |
|
rel_path = os.path.relpath(pyc_path, extracted_dir) |
|
python_files[rel_path] = f.read() |
|
except: |
|
pass |
|
|
|
return { |
|
"success": True, |
|
"message": "Successfully extracted Python code", |
|
"files": python_files |
|
} |
|
return { |
|
"success": False, |
|
"message": "Not a PyInstaller executable or extraction failed" |
|
} |
|
except Exception as e: |
|
return { |
|
"success": False, |
|
"message": f"PyInstaller extraction error: {str(e)}" |
|
} |
|
|
|
def disassemble_binary(file_path, is_dll=False): |
|
"""Disassemble a binary file to approximate code""" |
|
try: |
|
pe = pefile.PE(file_path) |
|
|
|
|
|
is_64bit = pe.OPTIONAL_HEADER.Magic == 0x20b |
|
mode = CS_MODE_64 if is_64bit else CS_MODE_32 |
|
|
|
|
|
md = Cs(CS_ARCH_X86, mode) |
|
md.detail = True |
|
|
|
code_sections = [] |
|
|
|
|
|
entry_point = None |
|
if not is_dll: |
|
entry_rva = pe.OPTIONAL_HEADER.AddressOfEntryPoint |
|
for section in pe.sections: |
|
if section.contains_rva(entry_rva): |
|
entry_offset = entry_rva - section.VirtualAddress + section.PointerToRawData |
|
entry_point = entry_offset |
|
|
|
|
|
for section in pe.sections: |
|
section_name = section.Name.decode('utf-8', errors='ignore').strip('\x00') |
|
|
|
|
|
if section.Characteristics & 0x20000000: |
|
section_data = pe.get_data(section.VirtualAddress, section.SizeOfRawData) |
|
|
|
|
|
start_offset = 0 |
|
if entry_point is not None and section.contains_rva(pe.OPTIONAL_HEADER.AddressOfEntryPoint): |
|
start_offset = entry_point - section.PointerToRawData |
|
|
|
|
|
preview_size = min(len(section_data) - start_offset, 4096) |
|
|
|
|
|
disassembly = [] |
|
is_entry = entry_point is not None and section.contains_rva(pe.OPTIONAL_HEADER.AddressOfEntryPoint) |
|
|
|
for i, (address, size, mnemonic, op_str) in enumerate(md.disasm_lite( |
|
section_data[start_offset:start_offset+preview_size], |
|
section.VirtualAddress + start_offset)): |
|
|
|
|
|
entry_marker = "ENTRY POINT -> " if is_entry and i == 0 else "" |
|
disassembly.append(f"{entry_marker}0x{address:08x}: {mnemonic} {op_str}") |
|
|
|
code_sections.append({ |
|
"name": section_name, |
|
"disassembly": disassembly |
|
}) |
|
|
|
|
|
exports = [] |
|
if is_dll and hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'): |
|
for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols: |
|
if exp.name: |
|
exports.append(exp.name.decode('utf-8', errors='ignore')) |
|
|
|
|
|
pseudo_python = generate_pseudo_python(code_sections, exports, is_dll) |
|
|
|
return { |
|
"success": True, |
|
"code_sections": code_sections, |
|
"exports": exports, |
|
"pseudo_python": pseudo_python |
|
} |
|
except Exception as e: |
|
return { |
|
"success": False, |
|
"message": f"Disassembly error: {str(e)}" |
|
} |
|
|
|
def generate_pseudo_python(code_sections, exports, is_dll): |
|
"""Generate pseudo-Python code from disassembly""" |
|
pseudo_code = [] |
|
|
|
pseudo_code.append("# This is an approximation generated from binary code") |
|
pseudo_code.append("# It is NOT the original source code") |
|
pseudo_code.append("") |
|
|
|
if is_dll: |
|
pseudo_code.append("# DLL Export Functions") |
|
for export in exports: |
|
pseudo_code.append(f"def {export}():") |
|
pseudo_code.append(" # Implementation not recoverable from binary") |
|
pseudo_code.append(" pass") |
|
pseudo_code.append("") |
|
|
|
for section in code_sections: |
|
pseudo_code.append(f"# Code Section: {section['name']}") |
|
pseudo_code.append("def main():") |
|
|
|
|
|
in_function = False |
|
current_function = None |
|
|
|
|
|
for line in section['disassembly'][:100]: |
|
|
|
if "call" in line and "0x" in line: |
|
addr = re.search(r'0x[0-9a-f]+', line) |
|
if addr: |
|
pseudo_code.append(f" call_function_{addr.group(0)}()") |
|
|
|
elif "mov" in line: |
|
reg_match = re.search(r'mov\s+(\w+),\s+(.+)', line) |
|
if reg_match: |
|
dest, source = reg_match.groups() |
|
pseudo_code.append(f" {dest} = {source} # {line}") |
|
|
|
elif "cmp" in line: |
|
cmp_match = re.search(r'cmp\s+(\w+),\s+(.+)', line) |
|
if cmp_match: |
|
a, b = cmp_match.groups() |
|
pseudo_code.append(f" if {a} == {b}: # {line}") |
|
pseudo_code.append(" pass") |
|
else: |
|
pseudo_code.append(f" # {line}") |
|
|
|
pseudo_code.append("") |
|
|
|
pseudo_code.append("if __name__ == '__main__':") |
|
pseudo_code.append(" main()") |
|
pseudo_code.append("") |
|
|
|
return "\n".join(pseudo_code) |
|
|
|
def unpack_exe(file_path, output_dir): |
|
"""Extract information from an EXE file and attempt to convert to Python""" |
|
try: |
|
pe = pefile.PE(file_path) |
|
|
|
|
|
info = { |
|
"Machine": hex(pe.FILE_HEADER.Machine), |
|
"TimeDateStamp": pe.FILE_HEADER.TimeDateStamp, |
|
"NumberOfSections": pe.FILE_HEADER.NumberOfSections, |
|
"Sections": [] |
|
} |
|
|
|
|
|
for section in pe.sections: |
|
section_name = section.Name.decode('utf-8', errors='ignore').strip('\x00') |
|
info["Sections"].append({ |
|
"Name": section_name, |
|
"VirtualAddress": hex(section.VirtualAddress), |
|
"SizeOfRawData": section.SizeOfRawData, |
|
"Entropy": section.get_entropy() |
|
}) |
|
|
|
|
|
if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'): |
|
info["Imports"] = [] |
|
for entry in pe.DIRECTORY_ENTRY_IMPORT: |
|
dll_name = entry.dll.decode('utf-8', errors='ignore') |
|
imports = [] |
|
for imp in entry.imports: |
|
if imp.name: |
|
imports.append(imp.name.decode('utf-8', errors='ignore')) |
|
info["Imports"].append({ |
|
"DLL": dll_name, |
|
"Functions": imports |
|
}) |
|
|
|
|
|
if hasattr(pe, 'DIRECTORY_ENTRY_RESOURCE'): |
|
resource_dir = os.path.join(output_dir, "resources") |
|
os.makedirs(resource_dir, exist_ok=True) |
|
extracted_resources = [] |
|
|
|
for resource_type in pe.DIRECTORY_ENTRY_RESOURCE.entries: |
|
if hasattr(resource_type, 'directory'): |
|
for resource_id in resource_type.directory.entries: |
|
if hasattr(resource_id, 'directory'): |
|
for resource_lang in resource_id.directory.entries: |
|
data = pe.get_data(resource_lang.data.struct.OffsetToData, resource_lang.data.struct.Size) |
|
resource_filename = f"resource_{resource_type.id}_{resource_id.id}_{resource_lang.id}" |
|
resource_path = os.path.join(resource_dir, resource_filename) |
|
with open(resource_path, 'wb') as f: |
|
f.write(data) |
|
extracted_resources.append(resource_filename) |
|
|
|
info["ExtractedResources"] = extracted_resources |
|
|
|
|
|
disassembly_result = disassemble_binary(file_path) |
|
|
|
|
|
pyinstaller_result = try_pyinstaller_extraction(file_path, output_dir) |
|
|
|
return { |
|
"basic_info": info, |
|
"disassembly": disassembly_result, |
|
"pyinstaller": pyinstaller_result |
|
} |
|
except Exception as e: |
|
return {"Error": str(e)} |
|
|
|
def analyze_dll(file_path): |
|
"""Extract information and code from a DLL file""" |
|
try: |
|
pe = pefile.PE(file_path) |
|
|
|
|
|
info = { |
|
"Machine": hex(pe.FILE_HEADER.Machine), |
|
"TimeDateStamp": pe.FILE_HEADER.TimeDateStamp, |
|
"NumberOfSections": pe.FILE_HEADER.NumberOfSections, |
|
"Characteristics": hex(pe.FILE_HEADER.Characteristics), |
|
"DllCharacteristics": hex(pe.OPTIONAL_HEADER.DllCharacteristics), |
|
"Sections": [] |
|
} |
|
|
|
|
|
for section in pe.sections: |
|
section_name = section.Name.decode('utf-8', errors='ignore').strip('\x00') |
|
info["Sections"].append({ |
|
"Name": section_name, |
|
"VirtualAddress": hex(section.VirtualAddress), |
|
"SizeOfRawData": section.SizeOfRawData, |
|
"Entropy": section.get_entropy() |
|
}) |
|
|
|
|
|
if hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'): |
|
info["Exports"] = [] |
|
for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols: |
|
if exp.name: |
|
info["Exports"].append(exp.name.decode('utf-8', errors='ignore')) |
|
|
|
|
|
if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'): |
|
info["Imports"] = [] |
|
for entry in pe.DIRECTORY_ENTRY_IMPORT: |
|
dll_name = entry.dll.decode('utf-8', errors='ignore') |
|
imports = [] |
|
for imp in entry.imports: |
|
if imp.name: |
|
imports.append(imp.name.decode('utf-8', errors='ignore')) |
|
info["Imports"].append({ |
|
"DLL": dll_name, |
|
"Functions": imports |
|
}) |
|
|
|
|
|
disassembly_result = disassemble_binary(file_path, is_dll=True) |
|
|
|
return { |
|
"basic_info": info, |
|
"disassembly": disassembly_result |
|
} |
|
except Exception as e: |
|
return {"Error": str(e)} |
|
|
|
def process_zip_file(file_path, temp_dir): |
|
"""Process a ZIP file and extract its contents""" |
|
try: |
|
with zipfile.ZipFile(file_path, 'r') as zip_ref: |
|
|
|
file_list = zip_ref.namelist() |
|
|
|
|
|
zip_ref.extractall(temp_dir) |
|
|
|
|
|
nested_files = {} |
|
for root, _, files in os.walk(temp_dir): |
|
for file in files: |
|
full_path = os.path.join(root, file) |
|
rel_path = os.path.relpath(full_path, temp_dir) |
|
|
|
if file.endswith('.exe'): |
|
exe_output_dir = os.path.join(temp_dir, f"{file}_unpacked") |
|
os.makedirs(exe_output_dir, exist_ok=True) |
|
nested_files[rel_path] = { |
|
'type': 'exe', |
|
'info': unpack_exe(full_path, exe_output_dir) |
|
} |
|
elif file.endswith('.dll'): |
|
nested_files[rel_path] = { |
|
'type': 'dll', |
|
'info': analyze_dll(full_path) |
|
} |
|
|
|
return { |
|
'file_list': file_list, |
|
'nested_files': nested_files |
|
} |
|
except Exception as e: |
|
return {'error': str(e)} |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload a file (.zip, .exe, or .dll)", type=["zip", "exe", "dll"]) |
|
|
|
if uploaded_file is not None: |
|
with tempfile.TemporaryDirectory() as temp_dir: |
|
|
|
file_path = os.path.join(temp_dir, uploaded_file.name) |
|
with open(file_path, "wb") as f: |
|
f.write(uploaded_file.getbuffer()) |
|
|
|
st.success(f"File uploaded: {uploaded_file.name}") |
|
|
|
|
|
if uploaded_file.name.lower().endswith('.zip'): |
|
st.subheader("ZIP File Contents") |
|
output_dir = os.path.join(temp_dir, "extracted") |
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
result = process_zip_file(file_path, output_dir) |
|
|
|
if 'error' in result: |
|
st.error(f"Error processing ZIP file: {result['error']}") |
|
else: |
|
with st.expander("ZIP Contents", expanded=True): |
|
st.write(f"Total files: {len(result['file_list'])}") |
|
st.json(result['file_list']) |
|
|
|
if result['nested_files']: |
|
st.subheader("Detected Executable Files") |
|
for file_path, file_info in result['nested_files'].items(): |
|
with st.expander(f"{file_path} ({file_info['type'].upper()})"): |
|
if file_info['type'] == 'exe': |
|
tabs = st.tabs(["Basic Info", "Python Code", "Disassembly"]) |
|
with tabs[0]: |
|
st.json(file_info['info'].get('basic_info', {})) |
|
with tabs[1]: |
|
pyinstaller_result = file_info['info'].get('pyinstaller', {}) |
|
disassembly = file_info['info'].get('disassembly', {}) |
|
|
|
if pyinstaller_result.get('success', False): |
|
st.success("Python code extracted successfully!") |
|
for filename, content in pyinstaller_result.get('files', {}).items(): |
|
with st.expander(f"Python File: {filename}"): |
|
st.code(content, language="python") |
|
else: |
|
st.warning("Not a Python executable or extraction failed.") |
|
st.subheader("Generated Python-like Code") |
|
st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python") |
|
with tabs[2]: |
|
disassembly = file_info['info'].get('disassembly', {}) |
|
if disassembly.get('success', False): |
|
for section in disassembly.get('code_sections', []): |
|
with st.expander(f"Section: {section['name']}"): |
|
st.code("\n".join(section['disassembly']), language="asm") |
|
else: |
|
st.error(disassembly.get('message', "Disassembly failed")) |
|
else: |
|
tabs = st.tabs(["Basic Info", "DLL Code", "Exports/Imports"]) |
|
with tabs[0]: |
|
st.json(file_info['info'].get('basic_info', {})) |
|
with tabs[1]: |
|
disassembly = file_info['info'].get('disassembly', {}) |
|
st.subheader("Generated Python-like Code") |
|
st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python") |
|
with tabs[2]: |
|
basic_info = file_info['info'].get('basic_info', {}) |
|
if 'Exports' in basic_info: |
|
st.subheader("Exported Functions") |
|
st.json(basic_info['Exports']) |
|
if 'Imports' in basic_info: |
|
st.subheader("Imported DLLs and Functions") |
|
st.json(basic_info['Imports']) |
|
|
|
elif uploaded_file.name.lower().endswith('.exe'): |
|
st.subheader("EXE File Analysis and Decompilation") |
|
output_dir = os.path.join(temp_dir, "exe_unpacked") |
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
try: |
|
exe_info = unpack_exe(file_path, output_dir) |
|
|
|
tabs = st.tabs(["Basic Info", "Python Code", "Disassembly", "Resources"]) |
|
with tabs[0]: |
|
st.json(exe_info.get('basic_info', {})) |
|
with tabs[1]: |
|
pyinstaller_result = exe_info.get('pyinstaller', {}) |
|
disassembly = exe_info.get('disassembly', {}) |
|
|
|
if pyinstaller_result.get('success', False): |
|
st.success("Python code extracted successfully!") |
|
for filename, content in pyinstaller_result.get('files', {}).items(): |
|
with st.expander(f"Python File: {filename}"): |
|
st.code(content, language="python") |
|
else: |
|
st.warning(pyinstaller_result.get('message', "Not a Python executable or extraction failed.")) |
|
st.subheader("Generated Python-like Code") |
|
st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python") |
|
with tabs[2]: |
|
disassembly = exe_info.get('disassembly', {}) |
|
if disassembly.get('success', False): |
|
for section in disassembly.get('code_sections', []): |
|
with st.expander(f"Section: {section['name']}"): |
|
st.code("\n".join(section['disassembly']), language="asm") |
|
else: |
|
st.error(disassembly.get('message', "Disassembly failed")) |
|
with tabs[3]: |
|
basic_info = exe_info.get('basic_info', {}) |
|
if 'ExtractedResources' in basic_info: |
|
st.write(f"Found {len(basic_info['ExtractedResources'])} resources") |
|
for resource in basic_info['ExtractedResources']: |
|
st.text(f"Resource: {resource}") |
|
except Exception as e: |
|
st.error(f"Error analyzing EXE file: {str(e)}") |
|
|
|
elif uploaded_file.name.lower().endswith('.dll'): |
|
st.subheader("DLL File Analysis and Decompilation") |
|
try: |
|
dll_info = analyze_dll(file_path) |
|
|
|
tabs = st.tabs(["Basic Info", "DLL Code", "Exports/Imports"]) |
|
with tabs[0]: |
|
st.json(dll_info.get('basic_info', {})) |
|
with tabs[1]: |
|
disassembly = dll_info.get('disassembly', {}) |
|
st.subheader("Generated Python-like Code") |
|
st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python") |
|
|
|
if disassembly.get('success', False): |
|
st.subheader("Assembly Code") |
|
for section in disassembly.get('code_sections', []): |
|
with st.expander(f"Section: {section['name']}"): |
|
st.code("\n".join(section['disassembly']), language="asm") |
|
with tabs[2]: |
|
basic_info = dll_info.get('basic_info', {}) |
|
if 'Exports' in basic_info: |
|
st.subheader("Exported Functions") |
|
st.json(basic_info['Exports']) |
|
if 'Imports' in basic_info: |
|
st.subheader("Imported DLLs and Functions") |
|
st.json(basic_info['Imports']) |
|
except Exception as e: |
|
st.error(f"Error analyzing DLL file: {str(e)}") |