Spaces:

euler314
/

Exe_unpacker

Building

File size: 29,812 Bytes

import streamlit as st
import os
import zipfile
import tempfile
import pefile
import subprocess
import binascii
import re
import json
from pathlib import Path

st.set_page_config(page_title="Executable Reverse Engineer", page_icon="🔍", layout="wide")

st.title("Executable Reverse Engineering Tool")
st.markdown("""
This tool performs reverse engineering on executables (.exe/.dll) to show their inner workings:
- Disassembles machine code to view assembly instructions
- Attempts to decompile to pseudocode
- Shows strings, imports, and other binary artifacts
- Works with executables from any programming language
""")

# Install necessary packages at startup
try:
    with st.spinner("Setting up reverse engineering environment..."):
        # Install key analysis libraries
        subprocess.run(["pip", "install", "pyinstaller-extractor"], capture_output=True)
        subprocess.run(["pip", "install", "uncompyle6"], capture_output=True)
        subprocess.run(["pip", "install", "capstone"], capture_output=True)
        subprocess.run(["pip", "install", "r2pipe"], capture_output=True)
        
        # Import after installation
        import r2pipe
        from capstone import Cs, CS_ARCH_X86, CS_MODE_32, CS_MODE_64
        st.success("Environment ready")
except Exception as e:
    st.error(f"Setup error: {str(e)}")
    
def extract_strings(file_path, min_length=4):
    """Extract ASCII and Unicode strings from binary file"""
    try:
        with open(file_path, 'rb') as f:
            content = f.read()
        
        # Extract ASCII strings
        ascii_pattern = b'[\\x20-\\x7E]{' + str(min_length).encode() + b',}'
        ascii_strings = re.findall(ascii_pattern, content)
        ascii_strings = [s.decode('ascii') for s in ascii_strings]
        
        # Extract Unicode strings (basic approach)
        unicode_pattern = b'(?:[\x20-\x7E]\x00){' + str(min_length).encode() + b',}'
        unicode_strings = re.findall(unicode_pattern, content)
        unicode_strings = [s.decode('utf-16-le', errors='ignore').rstrip('\x00') for s in unicode_strings]
        
        return sorted(set(ascii_strings + unicode_strings))
    except Exception as e:
        return [f"Error extracting strings: {str(e)}"]

def analyze_with_radare2(file_path):
    """Use radare2 through r2pipe for deep analysis"""
    try:
        # Open file with r2pipe
        r2 = r2pipe.open(file_path)
        
        # Perform initial analysis
        r2.cmd("aaa")  # Analyze all
        
        # Get basic information
        info = json.loads(r2.cmd("ij"))
        
        # Get entry point
        entry_point = r2.cmd("ie")
        
        # Get imports
        imports = r2.cmd("iij")
        imports = json.loads(imports) if imports else []
        
        # Get exports (for DLLs)
        exports = r2.cmd("iEj")
        exports = json.loads(exports) if exports else []
        
        # Get sections
        sections = r2.cmd("iSj")
        sections = json.loads(sections) if sections else []
        
        # Disassemble main function
        main_disasm = r2.cmd("s main; pdf")
        if not main_disasm or "Cannot find function" in main_disasm:
            main_disasm = r2.cmd("s entry0; pdf")  # Try entry point instead
        
        # Get function list
        functions = r2.cmd("aflj")
        functions = json.loads(functions) if functions else []
        
        # Get decompiled pseudocode (if available)
        pseudocode = r2.cmd("s main; pdc")
        if not pseudocode or pseudocode.strip() == "":
            pseudocode = r2.cmd("s entry0; pdc")  # Try entry point instead
        
        # Close r2
        r2.quit()
        
        return {
            "info": info,
            "entry_point": entry_point,
            "imports": imports,
            "exports": exports,
            "sections": sections,
            "main_disasm": main_disasm,
            "functions": functions,
            "pseudocode": pseudocode
        }
    except Exception as e:
        return {"error": f"Radare2 analysis failed: {str(e)}"}

def try_pyinstaller_extraction(file_path, output_dir):
    """Attempt to extract Python scripts from PyInstaller executables"""
    try:
        # Run pyinstxtractor on the file
        result = subprocess.run(["python", "-m", "pyinstaller-extractor", file_path], 
                               cwd=output_dir, capture_output=True, text=True)
        
        extracted_dir = os.path.join(output_dir, os.path.basename(file_path) + "_extracted")
        
        if os.path.exists(extracted_dir):
            # Try to decompile the Python bytecode files
            python_files = {}
            for root, _, files in os.walk(extracted_dir):
                for file in files:
                    if file.endswith('.pyc') or file.endswith('.pyo'):
                        pyc_path = os.path.join(root, file)
                        py_path = pyc_path + ".py"
                        try:
                            # Try decompiling with uncompyle6
                            subprocess.run(["uncompyle6", pyc_path, "-o", py_path], capture_output=True)
                            if os.path.exists(py_path):
                                with open(py_path, 'r', encoding='utf-8', errors='ignore') as f:
                                    rel_path = os.path.relpath(pyc_path, extracted_dir)
                                    python_files[rel_path] = f.read()
                        except:
                            pass
                            
            # Check if we found the PYZ archive (PyInstaller specific)
            pyz_files = []
            for root, _, files in os.walk(extracted_dir):
                for file in files:
                    if file.endswith('.pyz'):
                        pyz_path = os.path.join(root, file)
                        # Extract PYZ contents
                        pyz_extract_dir = pyz_path + "_extracted"
                        os.makedirs(pyz_extract_dir, exist_ok=True)
                        try:
                            # Extract PYZ files (these contain most of the Python modules)
                            subprocess.run(["python", "-m", "pyinstxtractor", pyz_path], 
                                          cwd=output_dir, capture_output=True)
                            for pyz_root, _, pyz_files in os.walk(pyz_extract_dir):
                                for pyz_file in pyz_files:
                                    if pyz_file.endswith('.pyc') or pyz_file.endswith('.pyo'):
                                        pyc_path = os.path.join(pyz_root, pyz_file)
                                        py_path = pyc_path + ".py"
                                        try:
                                            subprocess.run(["uncompyle6", pyc_path, "-o", py_path], capture_output=True)
                                            if os.path.exists(py_path):
                                                with open(py_path, 'r', encoding='utf-8', errors='ignore') as f:
                                                    rel_path = os.path.join("PYZ_ARCHIVE", os.path.relpath(pyc_path, pyz_extract_dir))
                                                    python_files[rel_path] = f.read()
                                        except:
                                            pass
                        except:
                            pass
            
            return {
                "success": True,
                "message": "Successfully extracted Python code",
                "files": python_files
            }
        return {
            "success": False,
            "message": "Not a PyInstaller executable or extraction failed"
        }
    except Exception as e:
        return {
            "success": False,
            "message": f"PyInstaller extraction error: {str(e)}"
        }

def analyze_binary(file_path, is_dll=False):
    """Comprehensive analysis of a binary file"""
    try:
        results = {}
        
        # Basic PE information using pefile
        pe = pefile.PE(file_path)
        
        # General info
        results["basic_info"] = {
            "Machine": hex(pe.FILE_HEADER.Machine),
            "TimeDateStamp": pe.FILE_HEADER.TimeDateStamp,
            "NumberOfSections": pe.FILE_HEADER.NumberOfSections,
            "SizeOfImage": pe.OPTIONAL_HEADER.SizeOfImage,
            "EntryPoint": hex(pe.OPTIONAL_HEADER.AddressOfEntryPoint),
            "ImageBase": hex(pe.OPTIONAL_HEADER.ImageBase),
            "Subsystem": pe.OPTIONAL_HEADER.Subsystem
        }
        
        # Section information
        results["sections"] = []
        for section in pe.sections:
            section_name = section.Name.decode('utf-8', errors='ignore').strip('\x00')
            results["sections"].append({
                "Name": section_name,
                "VirtualAddress": hex(section.VirtualAddress),
                "VirtualSize": section.Misc_VirtualSize,
                "SizeOfRawData": section.SizeOfRawData,
                "Entropy": section.get_entropy(),
                "Characteristics": hex(section.Characteristics)
            })
        
        # Import information
        results["imports"] = []
        if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'):
            for entry in pe.DIRECTORY_ENTRY_IMPORT:
                dll_name = entry.dll.decode('utf-8', errors='ignore')
                imports = []
                for imp in entry.imports:
                    if imp.name:
                        imports.append(imp.name.decode('utf-8', errors='ignore'))
                results["imports"].append({
                    "DLL": dll_name,
                    "Functions": imports
                })
        
        # Export information (for DLLs)
        results["exports"] = []
        if is_dll and hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'):
            for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols:
                if exp.name:
                    results["exports"].append({
                        "Name": exp.name.decode('utf-8', errors='ignore'),
                        "Address": hex(pe.OPTIONAL_HEADER.ImageBase + exp.address),
                        "Ordinal": exp.ordinal
                    })
        
        # Extract strings
        results["strings"] = extract_strings(file_path)
        
        # Use Radare2 for deeper analysis if available
        try:
            r2_results = analyze_with_radare2(file_path)
            if "error" not in r2_results:
                results["disassembly"] = r2_results["main_disasm"]
                results["functions"] = r2_results["functions"]
                results["pseudocode"] = r2_results["pseudocode"]
            else:
                # Fallback to basic disassembly with Capstone
                from capstone import Cs, CS_ARCH_X86, CS_MODE_32, CS_MODE_64
                
                # Determine if 32-bit or 64-bit
                is_64bit = pe.OPTIONAL_HEADER.Magic == 0x20b
                mode = CS_MODE_64 if is_64bit else CS_MODE_32
                
                # Initialize disassembler
                md = Cs(CS_ARCH_X86, mode)
                md.detail = True
                
                # Find the entry point
                entry_rva = pe.OPTIONAL_HEADER.AddressOfEntryPoint
                for section in pe.sections:
                    if section.contains_rva(entry_rva):
                        # Calculate file offset of entry point
                        entry_offset = entry_rva - section.VirtualAddress + section.PointerToRawData
                        entry_data = pe.get_memory_mapped_image()[entry_rva:entry_rva+512]  # Get 512 bytes from entry
                        
                        disassembly = []
                        for i, (address, size, mnemonic, op_str) in enumerate(md.disasm_lite(entry_data, pe.OPTIONAL_HEADER.ImageBase + entry_rva)):
                            if i >= 100:  # Limit to 100 instructions for preview
                                break
                            disassembly.append(f"0x{address:08x}: {mnemonic} {op_str}")
                        
                        results["disassembly"] = "\n".join(disassembly)
                        break
        except ImportError:
            # If r2pipe or capstone isn't available
            results["disassembly"] = "Advanced disassembly not available. Install r2pipe or capstone."
        
        return results
    except Exception as e:
        return {"Error": str(e)}

def process_zip_file(file_path, temp_dir):
    """Process a ZIP file and extract its contents"""
    try:
        with zipfile.ZipFile(file_path, 'r') as zip_ref:
            # Get file list before extraction
            file_list = zip_ref.namelist()
            
            # Extract to temp directory
            zip_ref.extractall(temp_dir)
            
            # Check for nested executables
            nested_files = {}
            for root, _, files in os.walk(temp_dir):
                for file in files:
                    full_path = os.path.join(root, file)
                    rel_path = os.path.relpath(full_path, temp_dir)
                    
                    if file.endswith('.exe'):
                        exe_output_dir = os.path.join(temp_dir, f"{file}_unpacked")
                        os.makedirs(exe_output_dir, exist_ok=True)
                        nested_files[rel_path] = {
                            'type': 'exe',
                            'analysis': analyze_binary(full_path),
                            'python_extraction': try_pyinstaller_extraction(full_path, exe_output_dir)
                        }
                    elif file.endswith('.dll'):
                        nested_files[rel_path] = {
                            'type': 'dll',
                            'analysis': analyze_binary(full_path, is_dll=True)
                        }
            
            return {
                'file_list': file_list,
                'nested_files': nested_files
            }
    except Exception as e:
        return {'error': str(e)}

# Main app logic
uploaded_file = st.file_uploader("Upload a file (.zip, .exe, or .dll)", type=["zip", "exe", "dll"])

if uploaded_file is not None:
    with tempfile.TemporaryDirectory() as temp_dir:
        # Save the uploaded file to the temporary directory
        file_path = os.path.join(temp_dir, uploaded_file.name)
        with open(file_path, "wb") as f:
            f.write(uploaded_file.getbuffer())
        
        st.success(f"File uploaded: {uploaded_file.name}")
        
        # Display file size
        file_size = os.path.getsize(file_path)
        st.info(f"File size: {file_size/1024:.2f} KB")
        
        # Process based on file type
        if uploaded_file.name.lower().endswith('.zip'):
            st.subheader("ZIP File Contents")
            with st.spinner("Analyzing ZIP contents..."):
                output_dir = os.path.join(temp_dir, "extracted")
                os.makedirs(output_dir, exist_ok=True)
                
                result = process_zip_file(file_path, output_dir)
                
                if 'error' in result:
                    st.error(f"Error processing ZIP file: {result['error']}")
                else:
                    with st.expander("ZIP Contents", expanded=True):
                        st.write(f"Total files: {len(result['file_list'])}")
                        st.code("\n".join(result['file_list']))
                    
                    if result['nested_files']:
                        st.subheader("Detected Executable Files")
                        for file_path, file_info in result['nested_files'].items():
                            with st.expander(f"{file_path} ({file_info['type'].upper()})"):
                                if file_info['type'] == 'exe':
                                    analysis = file_info['analysis']
                                    python_extraction = file_info['python_extraction']
                                    
                                    tabs = st.tabs(["Summary", "Imports", "Strings", "Assembly", "Python Code"])
                                    
                                    with tabs[0]:
                                        if "Error" in analysis:
                                            st.error(analysis["Error"])
                                        else:
                                            st.json(analysis.get("basic_info", {}))
                                            
                                            st.subheader("Sections")
                                            sections_df = {
                                                "Name": [],
                                                "VirtualSize": [],
                                                "SizeOfRawData": [],
                                                "Entropy": []
                                            }
                                            for section in analysis.get("sections", []):
                                                sections_df["Name"].append(section["Name"])
                                                sections_df["VirtualSize"].append(section["VirtualSize"])
                                                sections_df["SizeOfRawData"].append(section["SizeOfRawData"])
                                                sections_df["Entropy"].append(section["Entropy"])
                                            
                                            st.dataframe(sections_df)
                                    
                                    with tabs[1]:
                                        for imp in analysis.get("imports", []):
                                            with st.expander(f"DLL: {imp['DLL']}"):
                                                st.code("\n".join(imp["Functions"]))
                                    
                                    with tabs[2]:
                                        st.subheader("Strings Found")
                                        all_strings = analysis.get("strings", [])
                                        interesting_strings = [s for s in all_strings if len(s) > 8]  # Filter out very short strings
                                        st.code("\n".join(interesting_strings[:500]))  # Limit to 500 strings
                                    
                                    with tabs[3]:
                                        st.subheader("Disassembly")
                                        if "disassembly" in analysis:
                                            st.code(analysis["disassembly"], language="asm")
                                        else:
                                            st.warning("Disassembly not available")
                                        
                                        if "pseudocode" in analysis and analysis["pseudocode"]:
                                            st.subheader("Decompiled Pseudocode")
                                            st.code(analysis["pseudocode"], language="c")
                                    
                                    with tabs[4]:
                                        if python_extraction.get("success", False):
                                            st.success("Python code extracted successfully!")
                                            for filename, content in python_extraction.get("files", {}).items():
                                                with st.expander(f"Python File: {filename}"):
                                                    st.code(content, language="python")
                                        else:
                                            st.warning(python_extraction.get("message", "Not a Python executable or extraction failed."))
                                else:  # DLL
                                    analysis = file_info['analysis']
                                    
                                    tabs = st.tabs(["Summary", "Exports", "Imports", "Strings", "Assembly"])
                                    
                                    with tabs[0]:
                                        if "Error" in analysis:
                                            st.error(analysis["Error"])
                                        else:
                                            st.json(analysis.get("basic_info", {}))
                                            
                                            st.subheader("Sections")
                                            sections_df = {
                                                "Name": [],
                                                "VirtualSize": [],
                                                "SizeOfRawData": [],
                                                "Entropy": []
                                            }
                                            for section in analysis.get("sections", []):
                                                sections_df["Name"].append(section["Name"])
                                                sections_df["VirtualSize"].append(section["VirtualSize"])
                                                sections_df["SizeOfRawData"].append(section["SizeOfRawData"])
                                                sections_df["Entropy"].append(section["Entropy"])
                                            
                                            st.dataframe(sections_df)
                                    
                                    with tabs[1]:
                                        st.subheader("Exported Functions")
                                        st.json(analysis.get("exports", []))
                                    
                                    with tabs[2]:
                                        for imp in analysis.get("imports", []):
                                            with st.expander(f"DLL: {imp['DLL']}"):
                                                st.code("\n".join(imp["Functions"]))
                                    
                                    with tabs[3]:
                                        st.subheader("Strings Found")
                                        all_strings = analysis.get("strings", [])
                                        interesting_strings = [s for s in all_strings if len(s) > 8]  # Filter out very short strings
                                        st.code("\n".join(interesting_strings[:500]))  # Limit to 500 strings
                                    
                                    with tabs[4]:
                                        st.subheader("Disassembly")
                                        if "disassembly" in analysis:
                                            st.code(analysis["disassembly"], language="asm")
                                        else:
                                            st.warning("Disassembly not available")
                                        
                                        if "pseudocode" in analysis and analysis["pseudocode"]:
                                            st.subheader("Decompiled Pseudocode")
                                            st.code(analysis["pseudocode"], language="c")
        
        elif uploaded_file.name.lower().endswith('.exe'):
            st.subheader("EXE File Analysis")
            with st.spinner("Reverse engineering executable..."):
                output_dir = os.path.join(temp_dir, "exe_unpacked")
                os.makedirs(output_dir, exist_ok=True)
                
                # Perform comprehensive analysis
                analysis = analyze_binary(file_path)
                
                # Try Python extraction
                python_extraction = try_pyinstaller_extraction(file_path, output_dir)
                
                tabs = st.tabs(["Summary", "Imports", "Strings", "Assembly", "Python Code"])
                
                with tabs[0]:
                    if "Error" in analysis:
                        st.error(analysis["Error"])
                    else:
                        st.subheader("Basic Information")
                        st.json(analysis.get("basic_info", {}))
                        
                        st.subheader("Sections")
                        sections_df = {
                            "Name": [],
                            "VirtualSize": [],
                            "SizeOfRawData": [],
                            "Entropy": []
                        }
                        for section in analysis.get("sections", []):
                            sections_df["Name"].append(section["Name"])
                            sections_df["VirtualSize"].append(section["VirtualSize"])
                            sections_df["SizeOfRawData"].append(section["SizeOfRawData"])
                            sections_df["Entropy"].append(section["Entropy"])
                        
                        st.dataframe(sections_df)
                
                with tabs[1]:
                    st.subheader("Imported Functions")
                    for imp in analysis.get("imports", []):
                        with st.expander(f"DLL: {imp['DLL']}"):
                            st.code("\n".join(imp["Functions"]))
                
                with tabs[2]:
                    st.subheader("Strings Found")
                    all_strings = analysis.get("strings", [])
                    interesting_strings = [s for s in all_strings if len(s) > 8]  # Filter out very short strings
                    st.code("\n".join(interesting_strings[:500]))  # Limit to 500 strings
                
                with tabs[3]:
                    st.subheader("Disassembly")
                    if "disassembly" in analysis:
                        st.code(analysis["disassembly"], language="asm")
                    else:
                        st.warning("Disassembly not available")
                    
                    if "pseudocode" in analysis and analysis["pseudocode"]:
                        st.subheader("Decompiled Pseudocode")
                        st.code(analysis["pseudocode"], language="c")
                
                with tabs[4]:
                    if python_extraction.get("success", False):
                        st.success("Python code extracted successfully!")
                        for filename, content in python_extraction.get("files", {}).items():
                            with st.expander(f"Python File: {filename}"):
                                st.code(content, language="python")
                    else:
                        st.warning(python_extraction.get("message", "Not a Python executable or extraction failed."))
        
        elif uploaded_file.name.lower().endswith('.dll'):
            st.subheader("DLL File Analysis")
            with st.spinner("Reverse engineering DLL..."):
                # Perform comprehensive analysis (with is_dll=True)
                analysis = analyze_binary(file_path, is_dll=True)
                
                tabs = st.tabs(["Summary", "Exports", "Imports", "Strings", "Assembly"])
                
                with tabs[0]:
                    if "Error" in analysis:
                        st.error(analysis["Error"])
                    else:
                        st.subheader("Basic Information")
                        st.json(analysis.get("basic_info", {}))
                        
                        st.subheader("Sections")
                        sections_df = {
                            "Name": [],
                            "VirtualSize": [],
                            "SizeOfRawData": [],
                            "Entropy": []
                        }
                        for section in analysis.get("sections", []):
                            sections_df["Name"].append(section["Name"])
                            sections_df["VirtualSize"].append(section["VirtualSize"])
                            sections_df["SizeOfRawData"].append(section["SizeOfRawData"])
                            sections_df["Entropy"].append(section["Entropy"])
                        
                        st.dataframe(sections_df)
                
                with tabs[1]:
                    st.subheader("Exported Functions")
                    st.json(analysis.get("exports", []))
                
                with tabs[2]:
                    st.subheader("Imported Functions")
                    for imp in analysis.get("imports", []):
                        with st.expander(f"DLL: {imp['DLL']}"):
                            st.code("\n".join(imp["Functions"]))
                
                with tabs[3]:
                    st.subheader("Strings Found")
                    all_strings = analysis.get("strings", [])
                    interesting_strings = [s for s in all_strings if len(s) > 8]  # Filter out very short strings
                    st.code("\n".join(interesting_strings[:500]))  # Limit to 500 strings
                
                with tabs[4]:
                    st.subheader("Disassembly")
                    if "disassembly" in analysis:
                        st.code(analysis["disassembly"], language="asm")
                    else:
                        st.warning("Disassembly not available")
                    
                    if "pseudocode" in analysis and analysis["pseudocode"]:
                        st.subheader("Decompiled Pseudocode")
                        st.code(analysis["pseudocode"], language="c")