File size: 23,507 Bytes
25a8adb
 
 
 
 
 
157cf12
 
 
25a8adb
157cf12
25a8adb
157cf12
25a8adb
157cf12
25a8adb
157cf12
25a8adb
157cf12
 
 
25a8adb
 
157cf12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25a8adb
 
157cf12
25a8adb
 
 
157cf12
25a8adb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157cf12
25a8adb
 
 
 
 
 
 
 
157cf12
 
25a8adb
157cf12
 
 
 
 
 
 
 
 
25a8adb
157cf12
 
 
 
 
25a8adb
 
 
 
157cf12
25a8adb
 
 
157cf12
25a8adb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157cf12
25a8adb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157cf12
 
 
 
 
 
 
25a8adb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c99b4e2
 
 
 
 
 
 
 
 
 
 
 
 
25a8adb
c99b4e2
25a8adb
c99b4e2
 
 
 
 
 
25a8adb
c99b4e2
 
 
 
157cf12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c99b4e2
 
157cf12
c99b4e2
 
25a8adb
c99b4e2
 
25a8adb
157cf12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c99b4e2
 
 
 
157cf12
c99b4e2
 
157cf12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c99b4e2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
import streamlit as st
import os
import zipfile
import tempfile
import pefile
import shutil
import subprocess
import re
import struct
from pathlib import Path
from capstone import Cs, CS_ARCH_X86, CS_MODE_32, CS_MODE_64

st.set_page_config(page_title="Advanced File Analyzer", page_icon="πŸ”", layout="wide")

st.title("Advanced File Analysis Tool")
st.markdown("""
This tool allows you to:
- Extract and view contents of .zip files
- Decompile .exe files to Python-like code
- View decompiled code from .dll files
- Automatically analyze nested executables
""")

def try_pyinstaller_extraction(file_path, output_dir):
    """Attempt to extract Python scripts from PyInstaller executables"""
    try:
        # Install pyinstxtractor if not already present
        subprocess.run(["pip", "install", "pyinstxtractor"], capture_output=True)
        
        # Run pyinstxtractor on the file
        result = subprocess.run(["python", "-m", "pyinstxtractor", file_path], 
                               cwd=output_dir, capture_output=True, text=True)
        
        extracted_dir = os.path.join(output_dir, os.path.basename(file_path) + "_extracted")
        
        if os.path.exists(extracted_dir):
            # Try to decompile the Python bytecode files
            subprocess.run(["pip", "install", "uncompyle6"], capture_output=True)
            
            python_files = {}
            for root, _, files in os.walk(extracted_dir):
                for file in files:
                    if file.endswith('.pyc') or file.endswith('.pyo'):
                        pyc_path = os.path.join(root, file)
                        py_path = pyc_path + ".py"
                        try:
                            subprocess.run(["uncompyle6", pyc_path, "-o", py_path], capture_output=True)
                            if os.path.exists(py_path):
                                with open(py_path, 'r', encoding='utf-8', errors='ignore') as f:
                                    rel_path = os.path.relpath(pyc_path, extracted_dir)
                                    python_files[rel_path] = f.read()
                        except:
                            pass
            
            return {
                "success": True,
                "message": "Successfully extracted Python code",
                "files": python_files
            }
        return {
            "success": False,
            "message": "Not a PyInstaller executable or extraction failed"
        }
    except Exception as e:
        return {
            "success": False,
            "message": f"PyInstaller extraction error: {str(e)}"
        }

def disassemble_binary(file_path, is_dll=False):
    """Disassemble a binary file to approximate code"""
    try:
        pe = pefile.PE(file_path)
        
        # Determine if 32-bit or 64-bit
        is_64bit = pe.OPTIONAL_HEADER.Magic == 0x20b
        mode = CS_MODE_64 if is_64bit else CS_MODE_32
        
        # Initialize disassembler
        md = Cs(CS_ARCH_X86, mode)
        md.detail = True
        
        code_sections = []
        
        # Find the entry point for EXEs
        entry_point = None
        if not is_dll:
            entry_rva = pe.OPTIONAL_HEADER.AddressOfEntryPoint
            for section in pe.sections:
                if section.contains_rva(entry_rva):
                    entry_offset = entry_rva - section.VirtualAddress + section.PointerToRawData
                    entry_point = entry_offset
        
        # Process each section
        for section in pe.sections:
            section_name = section.Name.decode('utf-8', errors='ignore').strip('\x00')
            
            # Focus on code sections
            if section.Characteristics & 0x20000000:  # IMAGE_SCN_CNT_CODE
                section_data = pe.get_data(section.VirtualAddress, section.SizeOfRawData)
                
                # Start disassembly from entry point if in this section
                start_offset = 0
                if entry_point is not None and section.contains_rva(pe.OPTIONAL_HEADER.AddressOfEntryPoint):
                    start_offset = entry_point - section.PointerToRawData
                
                # Limit to reasonable size for preview
                preview_size = min(len(section_data) - start_offset, 4096)
                
                # Disassemble
                disassembly = []
                is_entry = entry_point is not None and section.contains_rva(pe.OPTIONAL_HEADER.AddressOfEntryPoint)
                
                for i, (address, size, mnemonic, op_str) in enumerate(md.disasm_lite(
                        section_data[start_offset:start_offset+preview_size], 
                        section.VirtualAddress + start_offset)):
                    
                    # Mark entry point
                    entry_marker = "ENTRY POINT -> " if is_entry and i == 0 else ""
                    disassembly.append(f"{entry_marker}0x{address:08x}: {mnemonic} {op_str}")
                
                code_sections.append({
                    "name": section_name,
                    "disassembly": disassembly
                })
        
        # Get exports for DLLs
        exports = []
        if is_dll and hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'):
            for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols:
                if exp.name:
                    exports.append(exp.name.decode('utf-8', errors='ignore'))
        
        # Convert to pseudo-Python
        pseudo_python = generate_pseudo_python(code_sections, exports, is_dll)
        
        return {
            "success": True,
            "code_sections": code_sections,
            "exports": exports,
            "pseudo_python": pseudo_python
        }
    except Exception as e:
        return {
            "success": False,
            "message": f"Disassembly error: {str(e)}"
        }

def generate_pseudo_python(code_sections, exports, is_dll):
    """Generate pseudo-Python code from disassembly"""
    pseudo_code = []
    
    pseudo_code.append("# This is an approximation generated from binary code")
    pseudo_code.append("# It is NOT the original source code")
    pseudo_code.append("")
    
    if is_dll:
        pseudo_code.append("# DLL Export Functions")
        for export in exports:
            pseudo_code.append(f"def {export}():")
            pseudo_code.append("    # Implementation not recoverable from binary")
            pseudo_code.append("    pass")
            pseudo_code.append("")
    
    for section in code_sections:
        pseudo_code.append(f"# Code Section: {section['name']}")
        pseudo_code.append("def main():")
        
        # Very basic pattern recognition for common operations
        in_function = False
        current_function = None
        
        # Extract common patterns from assembly
        for line in section['disassembly'][:100]:  # Limit to first 100 instructions for preview
            # Look for call instructions to infer function calls
            if "call" in line and "0x" in line:
                addr = re.search(r'0x[0-9a-f]+', line)
                if addr:
                    pseudo_code.append(f"    call_function_{addr.group(0)}()")
            # Look for mov instructions to infer variable assignments
            elif "mov" in line:
                reg_match = re.search(r'mov\s+(\w+),\s+(.+)', line)
                if reg_match:
                    dest, source = reg_match.groups()
                    pseudo_code.append(f"    {dest} = {source}  # {line}")
            # Look for common comparisons
            elif "cmp" in line:
                cmp_match = re.search(r'cmp\s+(\w+),\s+(.+)', line)
                if cmp_match:
                    a, b = cmp_match.groups()
                    pseudo_code.append(f"    if {a} == {b}:  # {line}")
                    pseudo_code.append("        pass")
            else:
                pseudo_code.append(f"    # {line}")
                
        pseudo_code.append("")
        
        pseudo_code.append("if __name__ == '__main__':")
        pseudo_code.append("    main()")
        pseudo_code.append("")
    
    return "\n".join(pseudo_code)

def unpack_exe(file_path, output_dir):
    """Extract information from an EXE file and attempt to convert to Python"""
    try:
        pe = pefile.PE(file_path)
        
        # Basic PE information
        info = {
            "Machine": hex(pe.FILE_HEADER.Machine),
            "TimeDateStamp": pe.FILE_HEADER.TimeDateStamp,
            "NumberOfSections": pe.FILE_HEADER.NumberOfSections,
            "Sections": []
        }
        
        # Get section information
        for section in pe.sections:
            section_name = section.Name.decode('utf-8', errors='ignore').strip('\x00')
            info["Sections"].append({
                "Name": section_name,
                "VirtualAddress": hex(section.VirtualAddress),
                "SizeOfRawData": section.SizeOfRawData,
                "Entropy": section.get_entropy()
            })
        
        # Get imports
        if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'):
            info["Imports"] = []
            for entry in pe.DIRECTORY_ENTRY_IMPORT:
                dll_name = entry.dll.decode('utf-8', errors='ignore')
                imports = []
                for imp in entry.imports:
                    if imp.name:
                        imports.append(imp.name.decode('utf-8', errors='ignore'))
                info["Imports"].append({
                    "DLL": dll_name,
                    "Functions": imports
                })
        
        # Extract resources if present
        if hasattr(pe, 'DIRECTORY_ENTRY_RESOURCE'):
            resource_dir = os.path.join(output_dir, "resources")
            os.makedirs(resource_dir, exist_ok=True)
            extracted_resources = []
            
            for resource_type in pe.DIRECTORY_ENTRY_RESOURCE.entries:
                if hasattr(resource_type, 'directory'):
                    for resource_id in resource_type.directory.entries:
                        if hasattr(resource_id, 'directory'):
                            for resource_lang in resource_id.directory.entries:
                                data = pe.get_data(resource_lang.data.struct.OffsetToData, resource_lang.data.struct.Size)
                                resource_filename = f"resource_{resource_type.id}_{resource_id.id}_{resource_lang.id}"
                                resource_path = os.path.join(resource_dir, resource_filename)
                                with open(resource_path, 'wb') as f:
                                    f.write(data)
                                extracted_resources.append(resource_filename)
            
            info["ExtractedResources"] = extracted_resources
        
        # Try to disassemble and convert to pseudo-Python
        disassembly_result = disassemble_binary(file_path)
        
        # Try PyInstaller extraction for Python executables
        pyinstaller_result = try_pyinstaller_extraction(file_path, output_dir)
        
        return {
            "basic_info": info,
            "disassembly": disassembly_result,
            "pyinstaller": pyinstaller_result
        }
    except Exception as e:
        return {"Error": str(e)}

def analyze_dll(file_path):
    """Extract information and code from a DLL file"""
    try:
        pe = pefile.PE(file_path)
        
        # Basic information
        info = {
            "Machine": hex(pe.FILE_HEADER.Machine),
            "TimeDateStamp": pe.FILE_HEADER.TimeDateStamp,
            "NumberOfSections": pe.FILE_HEADER.NumberOfSections,
            "Characteristics": hex(pe.FILE_HEADER.Characteristics),
            "DllCharacteristics": hex(pe.OPTIONAL_HEADER.DllCharacteristics),
            "Sections": []
        }
        
        # Get section information
        for section in pe.sections:
            section_name = section.Name.decode('utf-8', errors='ignore').strip('\x00')
            info["Sections"].append({
                "Name": section_name,
                "VirtualAddress": hex(section.VirtualAddress),
                "SizeOfRawData": section.SizeOfRawData,
                "Entropy": section.get_entropy()
            })
        
        # Get exports
        if hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'):
            info["Exports"] = []
            for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols:
                if exp.name:
                    info["Exports"].append(exp.name.decode('utf-8', errors='ignore'))
        
        # Get imports
        if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'):
            info["Imports"] = []
            for entry in pe.DIRECTORY_ENTRY_IMPORT:
                dll_name = entry.dll.decode('utf-8', errors='ignore')
                imports = []
                for imp in entry.imports:
                    if imp.name:
                        imports.append(imp.name.decode('utf-8', errors='ignore'))
                info["Imports"].append({
                    "DLL": dll_name,
                    "Functions": imports
                })
        
        # Try to disassemble and convert to pseudo-Python
        disassembly_result = disassemble_binary(file_path, is_dll=True)
        
        return {
            "basic_info": info,
            "disassembly": disassembly_result
        }
    except Exception as e:
        return {"Error": str(e)}

def process_zip_file(file_path, temp_dir):
    """Process a ZIP file and extract its contents"""
    try:
        with zipfile.ZipFile(file_path, 'r') as zip_ref:
            # Get file list before extraction
            file_list = zip_ref.namelist()
            
            # Extract to temp directory
            zip_ref.extractall(temp_dir)
            
            # Check for nested executables
            nested_files = {}
            for root, _, files in os.walk(temp_dir):
                for file in files:
                    full_path = os.path.join(root, file)
                    rel_path = os.path.relpath(full_path, temp_dir)
                    
                    if file.endswith('.exe'):
                        exe_output_dir = os.path.join(temp_dir, f"{file}_unpacked")
                        os.makedirs(exe_output_dir, exist_ok=True)
                        nested_files[rel_path] = {
                            'type': 'exe',
                            'info': unpack_exe(full_path, exe_output_dir)
                        }
                    elif file.endswith('.dll'):
                        nested_files[rel_path] = {
                            'type': 'dll',
                            'info': analyze_dll(full_path)
                        }
            
            return {
                'file_list': file_list,
                'nested_files': nested_files
            }
    except Exception as e:
        return {'error': str(e)}

# Main app logic
uploaded_file = st.file_uploader("Upload a file (.zip, .exe, or .dll)", type=["zip", "exe", "dll"])

if uploaded_file is not None:
    with tempfile.TemporaryDirectory() as temp_dir:
        # Save the uploaded file to the temporary directory
        file_path = os.path.join(temp_dir, uploaded_file.name)
        with open(file_path, "wb") as f:
            f.write(uploaded_file.getbuffer())
        
        st.success(f"File uploaded: {uploaded_file.name}")
        
        # Process based on file type
        if uploaded_file.name.lower().endswith('.zip'):
            st.subheader("ZIP File Contents")
            output_dir = os.path.join(temp_dir, "extracted")
            os.makedirs(output_dir, exist_ok=True)
            
            result = process_zip_file(file_path, output_dir)
            
            if 'error' in result:
                st.error(f"Error processing ZIP file: {result['error']}")
            else:
                with st.expander("ZIP Contents", expanded=True):
                    st.write(f"Total files: {len(result['file_list'])}")
                    st.json(result['file_list'])
                
                if result['nested_files']:
                    st.subheader("Detected Executable Files")
                    for file_path, file_info in result['nested_files'].items():
                        with st.expander(f"{file_path} ({file_info['type'].upper()})"):
                            if file_info['type'] == 'exe':
                                tabs = st.tabs(["Basic Info", "Python Code", "Disassembly"])
                                with tabs[0]:
                                    st.json(file_info['info'].get('basic_info', {}))
                                with tabs[1]:
                                    pyinstaller_result = file_info['info'].get('pyinstaller', {})
                                    disassembly = file_info['info'].get('disassembly', {})
                                    
                                    if pyinstaller_result.get('success', False):
                                        st.success("Python code extracted successfully!")
                                        for filename, content in pyinstaller_result.get('files', {}).items():
                                            with st.expander(f"Python File: {filename}"):
                                                st.code(content, language="python")
                                    else:
                                        st.warning("Not a Python executable or extraction failed.")
                                        st.subheader("Generated Python-like Code")
                                        st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python")
                                with tabs[2]:
                                    disassembly = file_info['info'].get('disassembly', {})
                                    if disassembly.get('success', False):
                                        for section in disassembly.get('code_sections', []):
                                            with st.expander(f"Section: {section['name']}"):
                                                st.code("\n".join(section['disassembly']), language="asm")
                                    else:
                                        st.error(disassembly.get('message', "Disassembly failed"))
                            else:  # DLL
                                tabs = st.tabs(["Basic Info", "DLL Code", "Exports/Imports"])
                                with tabs[0]:
                                    st.json(file_info['info'].get('basic_info', {}))
                                with tabs[1]:
                                    disassembly = file_info['info'].get('disassembly', {})
                                    st.subheader("Generated Python-like Code")
                                    st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python")
                                with tabs[2]:
                                    basic_info = file_info['info'].get('basic_info', {})
                                    if 'Exports' in basic_info:
                                        st.subheader("Exported Functions")
                                        st.json(basic_info['Exports'])
                                    if 'Imports' in basic_info:
                                        st.subheader("Imported DLLs and Functions")
                                        st.json(basic_info['Imports'])
        
        elif uploaded_file.name.lower().endswith('.exe'):
            st.subheader("EXE File Analysis and Decompilation")
            output_dir = os.path.join(temp_dir, "exe_unpacked")
            os.makedirs(output_dir, exist_ok=True)
            
            try:
                exe_info = unpack_exe(file_path, output_dir)
                
                tabs = st.tabs(["Basic Info", "Python Code", "Disassembly", "Resources"])
                with tabs[0]:
                    st.json(exe_info.get('basic_info', {}))
                with tabs[1]:
                    pyinstaller_result = exe_info.get('pyinstaller', {})
                    disassembly = exe_info.get('disassembly', {})
                    
                    if pyinstaller_result.get('success', False):
                        st.success("Python code extracted successfully!")
                        for filename, content in pyinstaller_result.get('files', {}).items():
                            with st.expander(f"Python File: {filename}"):
                                st.code(content, language="python")
                    else:
                        st.warning(pyinstaller_result.get('message', "Not a Python executable or extraction failed."))
                        st.subheader("Generated Python-like Code")
                        st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python")
                with tabs[2]:
                    disassembly = exe_info.get('disassembly', {})
                    if disassembly.get('success', False):
                        for section in disassembly.get('code_sections', []):
                            with st.expander(f"Section: {section['name']}"):
                                st.code("\n".join(section['disassembly']), language="asm")
                    else:
                        st.error(disassembly.get('message', "Disassembly failed"))
                with tabs[3]:
                    basic_info = exe_info.get('basic_info', {})
                    if 'ExtractedResources' in basic_info:
                        st.write(f"Found {len(basic_info['ExtractedResources'])} resources")
                        for resource in basic_info['ExtractedResources']:
                            st.text(f"Resource: {resource}")
            except Exception as e:
                st.error(f"Error analyzing EXE file: {str(e)}")
        
        elif uploaded_file.name.lower().endswith('.dll'):
            st.subheader("DLL File Analysis and Decompilation")
            try:
                dll_info = analyze_dll(file_path)
                
                tabs = st.tabs(["Basic Info", "DLL Code", "Exports/Imports"])
                with tabs[0]:
                    st.json(dll_info.get('basic_info', {}))
                with tabs[1]:
                    disassembly = dll_info.get('disassembly', {})
                    st.subheader("Generated Python-like Code")
                    st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python")
                    
                    if disassembly.get('success', False):
                        st.subheader("Assembly Code")
                        for section in disassembly.get('code_sections', []):
                            with st.expander(f"Section: {section['name']}"):
                                st.code("\n".join(section['disassembly']), language="asm")
                with tabs[2]:
                    basic_info = dll_info.get('basic_info', {})
                    if 'Exports' in basic_info:
                        st.subheader("Exported Functions")
                        st.json(basic_info['Exports'])
                    if 'Imports' in basic_info:
                        st.subheader("Imported DLLs and Functions")
                        st.json(basic_info['Imports'])
            except Exception as e:
                st.error(f"Error analyzing DLL file: {str(e)}")