import streamlit as st import os import zipfile import tempfile import pefile import shutil import subprocess import re import struct from pathlib import Path from capstone import Cs, CS_ARCH_X86, CS_MODE_32, CS_MODE_64 st.set_page_config(page_title="Advanced File Analyzer", page_icon="🔍", layout="wide") st.title("Advanced File Analysis Tool") st.markdown(""" This tool allows you to: - Extract and view contents of .zip files - Decompile .exe files to Python-like code - View decompiled code from .dll files - Automatically analyze nested executables """) def try_pyinstaller_extraction(file_path, output_dir): """Attempt to extract Python scripts from PyInstaller executables""" try: # Install pyinstxtractor if not already present subprocess.run(["pip", "install", "pyinstxtractor"], capture_output=True) # Run pyinstxtractor on the file result = subprocess.run(["python", "-m", "pyinstxtractor", file_path], cwd=output_dir, capture_output=True, text=True) extracted_dir = os.path.join(output_dir, os.path.basename(file_path) + "_extracted") if os.path.exists(extracted_dir): # Try to decompile the Python bytecode files subprocess.run(["pip", "install", "uncompyle6"], capture_output=True) python_files = {} for root, _, files in os.walk(extracted_dir): for file in files: if file.endswith('.pyc') or file.endswith('.pyo'): pyc_path = os.path.join(root, file) py_path = pyc_path + ".py" try: subprocess.run(["uncompyle6", pyc_path, "-o", py_path], capture_output=True) if os.path.exists(py_path): with open(py_path, 'r', encoding='utf-8', errors='ignore') as f: rel_path = os.path.relpath(pyc_path, extracted_dir) python_files[rel_path] = f.read() except: pass return { "success": True, "message": "Successfully extracted Python code", "files": python_files } return { "success": False, "message": "Not a PyInstaller executable or extraction failed" } except Exception as e: return { "success": False, "message": f"PyInstaller extraction error: {str(e)}" } def disassemble_binary(file_path, is_dll=False): """Disassemble a binary file to approximate code""" try: pe = pefile.PE(file_path) # Determine if 32-bit or 64-bit is_64bit = pe.OPTIONAL_HEADER.Magic == 0x20b mode = CS_MODE_64 if is_64bit else CS_MODE_32 # Initialize disassembler md = Cs(CS_ARCH_X86, mode) md.detail = True code_sections = [] # Find the entry point for EXEs entry_point = None if not is_dll: entry_rva = pe.OPTIONAL_HEADER.AddressOfEntryPoint for section in pe.sections: if section.contains_rva(entry_rva): entry_offset = entry_rva - section.VirtualAddress + section.PointerToRawData entry_point = entry_offset # Process each section for section in pe.sections: section_name = section.Name.decode('utf-8', errors='ignore').strip('\x00') # Focus on code sections if section.Characteristics & 0x20000000: # IMAGE_SCN_CNT_CODE section_data = pe.get_data(section.VirtualAddress, section.SizeOfRawData) # Start disassembly from entry point if in this section start_offset = 0 if entry_point is not None and section.contains_rva(pe.OPTIONAL_HEADER.AddressOfEntryPoint): start_offset = entry_point - section.PointerToRawData # Limit to reasonable size for preview preview_size = min(len(section_data) - start_offset, 4096) # Disassemble disassembly = [] is_entry = entry_point is not None and section.contains_rva(pe.OPTIONAL_HEADER.AddressOfEntryPoint) for i, (address, size, mnemonic, op_str) in enumerate(md.disasm_lite( section_data[start_offset:start_offset+preview_size], section.VirtualAddress + start_offset)): # Mark entry point entry_marker = "ENTRY POINT -> " if is_entry and i == 0 else "" disassembly.append(f"{entry_marker}0x{address:08x}: {mnemonic} {op_str}") code_sections.append({ "name": section_name, "disassembly": disassembly }) # Get exports for DLLs exports = [] if is_dll and hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'): for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols: if exp.name: exports.append(exp.name.decode('utf-8', errors='ignore')) # Convert to pseudo-Python pseudo_python = generate_pseudo_python(code_sections, exports, is_dll) return { "success": True, "code_sections": code_sections, "exports": exports, "pseudo_python": pseudo_python } except Exception as e: return { "success": False, "message": f"Disassembly error: {str(e)}" } def generate_pseudo_python(code_sections, exports, is_dll): """Generate pseudo-Python code from disassembly""" pseudo_code = [] pseudo_code.append("# This is an approximation generated from binary code") pseudo_code.append("# It is NOT the original source code") pseudo_code.append("") if is_dll: pseudo_code.append("# DLL Export Functions") for export in exports: pseudo_code.append(f"def {export}():") pseudo_code.append(" # Implementation not recoverable from binary") pseudo_code.append(" pass") pseudo_code.append("") for section in code_sections: pseudo_code.append(f"# Code Section: {section['name']}") pseudo_code.append("def main():") # Very basic pattern recognition for common operations in_function = False current_function = None # Extract common patterns from assembly for line in section['disassembly'][:100]: # Limit to first 100 instructions for preview # Look for call instructions to infer function calls if "call" in line and "0x" in line: addr = re.search(r'0x[0-9a-f]+', line) if addr: pseudo_code.append(f" call_function_{addr.group(0)}()") # Look for mov instructions to infer variable assignments elif "mov" in line: reg_match = re.search(r'mov\s+(\w+),\s+(.+)', line) if reg_match: dest, source = reg_match.groups() pseudo_code.append(f" {dest} = {source} # {line}") # Look for common comparisons elif "cmp" in line: cmp_match = re.search(r'cmp\s+(\w+),\s+(.+)', line) if cmp_match: a, b = cmp_match.groups() pseudo_code.append(f" if {a} == {b}: # {line}") pseudo_code.append(" pass") else: pseudo_code.append(f" # {line}") pseudo_code.append("") pseudo_code.append("if __name__ == '__main__':") pseudo_code.append(" main()") pseudo_code.append("") return "\n".join(pseudo_code) def unpack_exe(file_path, output_dir): """Extract information from an EXE file and attempt to convert to Python""" try: pe = pefile.PE(file_path) # Basic PE information info = { "Machine": hex(pe.FILE_HEADER.Machine), "TimeDateStamp": pe.FILE_HEADER.TimeDateStamp, "NumberOfSections": pe.FILE_HEADER.NumberOfSections, "Sections": [] } # Get section information for section in pe.sections: section_name = section.Name.decode('utf-8', errors='ignore').strip('\x00') info["Sections"].append({ "Name": section_name, "VirtualAddress": hex(section.VirtualAddress), "SizeOfRawData": section.SizeOfRawData, "Entropy": section.get_entropy() }) # Get imports if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'): info["Imports"] = [] for entry in pe.DIRECTORY_ENTRY_IMPORT: dll_name = entry.dll.decode('utf-8', errors='ignore') imports = [] for imp in entry.imports: if imp.name: imports.append(imp.name.decode('utf-8', errors='ignore')) info["Imports"].append({ "DLL": dll_name, "Functions": imports }) # Extract resources if present if hasattr(pe, 'DIRECTORY_ENTRY_RESOURCE'): resource_dir = os.path.join(output_dir, "resources") os.makedirs(resource_dir, exist_ok=True) extracted_resources = [] for resource_type in pe.DIRECTORY_ENTRY_RESOURCE.entries: if hasattr(resource_type, 'directory'): for resource_id in resource_type.directory.entries: if hasattr(resource_id, 'directory'): for resource_lang in resource_id.directory.entries: data = pe.get_data(resource_lang.data.struct.OffsetToData, resource_lang.data.struct.Size) resource_filename = f"resource_{resource_type.id}_{resource_id.id}_{resource_lang.id}" resource_path = os.path.join(resource_dir, resource_filename) with open(resource_path, 'wb') as f: f.write(data) extracted_resources.append(resource_filename) info["ExtractedResources"] = extracted_resources # Try to disassemble and convert to pseudo-Python disassembly_result = disassemble_binary(file_path) # Try PyInstaller extraction for Python executables pyinstaller_result = try_pyinstaller_extraction(file_path, output_dir) return { "basic_info": info, "disassembly": disassembly_result, "pyinstaller": pyinstaller_result } except Exception as e: return {"Error": str(e)} def analyze_dll(file_path): """Extract information and code from a DLL file""" try: pe = pefile.PE(file_path) # Basic information info = { "Machine": hex(pe.FILE_HEADER.Machine), "TimeDateStamp": pe.FILE_HEADER.TimeDateStamp, "NumberOfSections": pe.FILE_HEADER.NumberOfSections, "Characteristics": hex(pe.FILE_HEADER.Characteristics), "DllCharacteristics": hex(pe.OPTIONAL_HEADER.DllCharacteristics), "Sections": [] } # Get section information for section in pe.sections: section_name = section.Name.decode('utf-8', errors='ignore').strip('\x00') info["Sections"].append({ "Name": section_name, "VirtualAddress": hex(section.VirtualAddress), "SizeOfRawData": section.SizeOfRawData, "Entropy": section.get_entropy() }) # Get exports if hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'): info["Exports"] = [] for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols: if exp.name: info["Exports"].append(exp.name.decode('utf-8', errors='ignore')) # Get imports if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'): info["Imports"] = [] for entry in pe.DIRECTORY_ENTRY_IMPORT: dll_name = entry.dll.decode('utf-8', errors='ignore') imports = [] for imp in entry.imports: if imp.name: imports.append(imp.name.decode('utf-8', errors='ignore')) info["Imports"].append({ "DLL": dll_name, "Functions": imports }) # Try to disassemble and convert to pseudo-Python disassembly_result = disassemble_binary(file_path, is_dll=True) return { "basic_info": info, "disassembly": disassembly_result } except Exception as e: return {"Error": str(e)} def process_zip_file(file_path, temp_dir): """Process a ZIP file and extract its contents""" try: with zipfile.ZipFile(file_path, 'r') as zip_ref: # Get file list before extraction file_list = zip_ref.namelist() # Extract to temp directory zip_ref.extractall(temp_dir) # Check for nested executables nested_files = {} for root, _, files in os.walk(temp_dir): for file in files: full_path = os.path.join(root, file) rel_path = os.path.relpath(full_path, temp_dir) if file.endswith('.exe'): exe_output_dir = os.path.join(temp_dir, f"{file}_unpacked") os.makedirs(exe_output_dir, exist_ok=True) nested_files[rel_path] = { 'type': 'exe', 'info': unpack_exe(full_path, exe_output_dir) } elif file.endswith('.dll'): nested_files[rel_path] = { 'type': 'dll', 'info': analyze_dll(full_path) } return { 'file_list': file_list, 'nested_files': nested_files } except Exception as e: return {'error': str(e)} # Main app logic uploaded_file = st.file_uploader("Upload a file (.zip, .exe, or .dll)", type=["zip", "exe", "dll"]) if uploaded_file is not None: with tempfile.TemporaryDirectory() as temp_dir: # Save the uploaded file to the temporary directory file_path = os.path.join(temp_dir, uploaded_file.name) with open(file_path, "wb") as f: f.write(uploaded_file.getbuffer()) st.success(f"File uploaded: {uploaded_file.name}") # Process based on file type if uploaded_file.name.lower().endswith('.zip'): st.subheader("ZIP File Contents") output_dir = os.path.join(temp_dir, "extracted") os.makedirs(output_dir, exist_ok=True) result = process_zip_file(file_path, output_dir) if 'error' in result: st.error(f"Error processing ZIP file: {result['error']}") else: with st.expander("ZIP Contents", expanded=True): st.write(f"Total files: {len(result['file_list'])}") st.json(result['file_list']) if result['nested_files']: st.subheader("Detected Executable Files") for file_path, file_info in result['nested_files'].items(): with st.expander(f"{file_path} ({file_info['type'].upper()})"): if file_info['type'] == 'exe': tabs = st.tabs(["Basic Info", "Python Code", "Disassembly"]) with tabs[0]: st.json(file_info['info'].get('basic_info', {})) with tabs[1]: pyinstaller_result = file_info['info'].get('pyinstaller', {}) disassembly = file_info['info'].get('disassembly', {}) if pyinstaller_result.get('success', False): st.success("Python code extracted successfully!") for filename, content in pyinstaller_result.get('files', {}).items(): with st.expander(f"Python File: {filename}"): st.code(content, language="python") else: st.warning("Not a Python executable or extraction failed.") st.subheader("Generated Python-like Code") st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python") with tabs[2]: disassembly = file_info['info'].get('disassembly', {}) if disassembly.get('success', False): for section in disassembly.get('code_sections', []): with st.expander(f"Section: {section['name']}"): st.code("\n".join(section['disassembly']), language="asm") else: st.error(disassembly.get('message', "Disassembly failed")) else: # DLL tabs = st.tabs(["Basic Info", "DLL Code", "Exports/Imports"]) with tabs[0]: st.json(file_info['info'].get('basic_info', {})) with tabs[1]: disassembly = file_info['info'].get('disassembly', {}) st.subheader("Generated Python-like Code") st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python") with tabs[2]: basic_info = file_info['info'].get('basic_info', {}) if 'Exports' in basic_info: st.subheader("Exported Functions") st.json(basic_info['Exports']) if 'Imports' in basic_info: st.subheader("Imported DLLs and Functions") st.json(basic_info['Imports']) elif uploaded_file.name.lower().endswith('.exe'): st.subheader("EXE File Analysis and Decompilation") output_dir = os.path.join(temp_dir, "exe_unpacked") os.makedirs(output_dir, exist_ok=True) try: exe_info = unpack_exe(file_path, output_dir) tabs = st.tabs(["Basic Info", "Python Code", "Disassembly", "Resources"]) with tabs[0]: st.json(exe_info.get('basic_info', {})) with tabs[1]: pyinstaller_result = exe_info.get('pyinstaller', {}) disassembly = exe_info.get('disassembly', {}) if pyinstaller_result.get('success', False): st.success("Python code extracted successfully!") for filename, content in pyinstaller_result.get('files', {}).items(): with st.expander(f"Python File: {filename}"): st.code(content, language="python") else: st.warning(pyinstaller_result.get('message', "Not a Python executable or extraction failed.")) st.subheader("Generated Python-like Code") st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python") with tabs[2]: disassembly = exe_info.get('disassembly', {}) if disassembly.get('success', False): for section in disassembly.get('code_sections', []): with st.expander(f"Section: {section['name']}"): st.code("\n".join(section['disassembly']), language="asm") else: st.error(disassembly.get('message', "Disassembly failed")) with tabs[3]: basic_info = exe_info.get('basic_info', {}) if 'ExtractedResources' in basic_info: st.write(f"Found {len(basic_info['ExtractedResources'])} resources") for resource in basic_info['ExtractedResources']: st.text(f"Resource: {resource}") except Exception as e: st.error(f"Error analyzing EXE file: {str(e)}") elif uploaded_file.name.lower().endswith('.dll'): st.subheader("DLL File Analysis and Decompilation") try: dll_info = analyze_dll(file_path) tabs = st.tabs(["Basic Info", "DLL Code", "Exports/Imports"]) with tabs[0]: st.json(dll_info.get('basic_info', {})) with tabs[1]: disassembly = dll_info.get('disassembly', {}) st.subheader("Generated Python-like Code") st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python") if disassembly.get('success', False): st.subheader("Assembly Code") for section in disassembly.get('code_sections', []): with st.expander(f"Section: {section['name']}"): st.code("\n".join(section['disassembly']), language="asm") with tabs[2]: basic_info = dll_info.get('basic_info', {}) if 'Exports' in basic_info: st.subheader("Exported Functions") st.json(basic_info['Exports']) if 'Imports' in basic_info: st.subheader("Imported DLLs and Functions") st.json(basic_info['Imports']) except Exception as e: st.error(f"Error analyzing DLL file: {str(e)}")