euler314 commited on
Commit
aba5897
·
verified ·
1 Parent(s): 4d6ee5a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +438 -351
app.py CHANGED
@@ -3,30 +3,119 @@ import os
3
  import zipfile
4
  import tempfile
5
  import pefile
6
- import shutil
7
  import subprocess
 
8
  import re
9
- import struct
10
  from pathlib import Path
11
- from capstone import Cs, CS_ARCH_X86, CS_MODE_32, CS_MODE_64
12
 
13
- st.set_page_config(page_title="Advanced File Analyzer", page_icon="🔍", layout="wide")
14
 
15
- st.title("Advanced File Analysis Tool")
16
  st.markdown("""
17
- This tool allows you to:
18
- - Extract and view contents of .zip files
19
- - Decompile .exe files to Python-like code
20
- - View decompiled code from .dll files
21
- - Automatically analyze nested executables
22
  """)
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  def try_pyinstaller_extraction(file_path, output_dir):
25
  """Attempt to extract Python scripts from PyInstaller executables"""
26
  try:
27
- # Install pyinstxtractor if not already present
28
- subprocess.run(["pip", "install", "pyinstxtractor"], capture_output=True)
29
-
30
  # Run pyinstxtractor on the file
31
  result = subprocess.run(["python", "-m", "pyinstxtractor", file_path],
32
  cwd=output_dir, capture_output=True, text=True)
@@ -35,8 +124,6 @@ def try_pyinstaller_extraction(file_path, output_dir):
35
 
36
  if os.path.exists(extracted_dir):
37
  # Try to decompile the Python bytecode files
38
- subprocess.run(["pip", "install", "uncompyle6"], capture_output=True)
39
-
40
  python_files = {}
41
  for root, _, files in os.walk(extracted_dir):
42
  for file in files:
@@ -44,6 +131,7 @@ def try_pyinstaller_extraction(file_path, output_dir):
44
  pyc_path = os.path.join(root, file)
45
  py_path = pyc_path + ".py"
46
  try:
 
47
  subprocess.run(["uncompyle6", pyc_path, "-o", py_path], capture_output=True)
48
  if os.path.exists(py_path):
49
  with open(py_path, 'r', encoding='utf-8', errors='ignore') as f:
@@ -51,6 +139,35 @@ def try_pyinstaller_extraction(file_path, output_dir):
51
  python_files[rel_path] = f.read()
52
  except:
53
  pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  return {
56
  "success": True,
@@ -67,264 +184,106 @@ def try_pyinstaller_extraction(file_path, output_dir):
67
  "message": f"PyInstaller extraction error: {str(e)}"
68
  }
69
 
70
- def disassemble_binary(file_path, is_dll=False):
71
- """Disassemble a binary file to approximate code"""
72
  try:
73
- pe = pefile.PE(file_path)
74
-
75
- # Determine if 32-bit or 64-bit
76
- is_64bit = pe.OPTIONAL_HEADER.Magic == 0x20b
77
- mode = CS_MODE_64 if is_64bit else CS_MODE_32
78
-
79
- # Initialize disassembler
80
- md = Cs(CS_ARCH_X86, mode)
81
- md.detail = True
82
-
83
- code_sections = []
84
-
85
- # Find the entry point for EXEs
86
- entry_point = None
87
- if not is_dll:
88
- entry_rva = pe.OPTIONAL_HEADER.AddressOfEntryPoint
89
- for section in pe.sections:
90
- if section.contains_rva(entry_rva):
91
- entry_offset = entry_rva - section.VirtualAddress + section.PointerToRawData
92
- entry_point = entry_offset
93
-
94
- # Process each section
95
- for section in pe.sections:
96
- section_name = section.Name.decode('utf-8', errors='ignore').strip('\x00')
97
-
98
- # Focus on code sections
99
- if section.Characteristics & 0x20000000: # IMAGE_SCN_CNT_CODE
100
- section_data = pe.get_data(section.VirtualAddress, section.SizeOfRawData)
101
-
102
- # Start disassembly from entry point if in this section
103
- start_offset = 0
104
- if entry_point is not None and section.contains_rva(pe.OPTIONAL_HEADER.AddressOfEntryPoint):
105
- start_offset = entry_point - section.PointerToRawData
106
-
107
- # Limit to reasonable size for preview
108
- preview_size = min(len(section_data) - start_offset, 4096)
109
-
110
- # Disassemble
111
- disassembly = []
112
- is_entry = entry_point is not None and section.contains_rva(pe.OPTIONAL_HEADER.AddressOfEntryPoint)
113
-
114
- for i, (address, size, mnemonic, op_str) in enumerate(md.disasm_lite(
115
- section_data[start_offset:start_offset+preview_size],
116
- section.VirtualAddress + start_offset)):
117
-
118
- # Mark entry point
119
- entry_marker = "ENTRY POINT -> " if is_entry and i == 0 else ""
120
- disassembly.append(f"{entry_marker}0x{address:08x}: {mnemonic} {op_str}")
121
-
122
- code_sections.append({
123
- "name": section_name,
124
- "disassembly": disassembly
125
- })
126
 
127
- # Get exports for DLLs
128
- exports = []
129
- if is_dll and hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'):
130
- for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols:
131
- if exp.name:
132
- exports.append(exp.name.decode('utf-8', errors='ignore'))
133
-
134
- # Convert to pseudo-Python
135
- pseudo_python = generate_pseudo_python(code_sections, exports, is_dll)
136
-
137
- return {
138
- "success": True,
139
- "code_sections": code_sections,
140
- "exports": exports,
141
- "pseudo_python": pseudo_python
142
- }
143
- except Exception as e:
144
- return {
145
- "success": False,
146
- "message": f"Disassembly error: {str(e)}"
147
- }
148
-
149
- def generate_pseudo_python(code_sections, exports, is_dll):
150
- """Generate pseudo-Python code from disassembly"""
151
- pseudo_code = []
152
-
153
- pseudo_code.append("# This is an approximation generated from binary code")
154
- pseudo_code.append("# It is NOT the original source code")
155
- pseudo_code.append("")
156
-
157
- if is_dll:
158
- pseudo_code.append("# DLL Export Functions")
159
- for export in exports:
160
- pseudo_code.append(f"def {export}():")
161
- pseudo_code.append(" # Implementation not recoverable from binary")
162
- pseudo_code.append(" pass")
163
- pseudo_code.append("")
164
-
165
- for section in code_sections:
166
- pseudo_code.append(f"# Code Section: {section['name']}")
167
- pseudo_code.append("def main():")
168
-
169
- # Very basic pattern recognition for common operations
170
- in_function = False
171
- current_function = None
172
-
173
- # Extract common patterns from assembly
174
- for line in section['disassembly'][:100]: # Limit to first 100 instructions for preview
175
- # Look for call instructions to infer function calls
176
- if "call" in line and "0x" in line:
177
- addr = re.search(r'0x[0-9a-f]+', line)
178
- if addr:
179
- pseudo_code.append(f" call_function_{addr.group(0)}()")
180
- # Look for mov instructions to infer variable assignments
181
- elif "mov" in line:
182
- reg_match = re.search(r'mov\s+(\w+),\s+(.+)', line)
183
- if reg_match:
184
- dest, source = reg_match.groups()
185
- pseudo_code.append(f" {dest} = {source} # {line}")
186
- # Look for common comparisons
187
- elif "cmp" in line:
188
- cmp_match = re.search(r'cmp\s+(\w+),\s+(.+)', line)
189
- if cmp_match:
190
- a, b = cmp_match.groups()
191
- pseudo_code.append(f" if {a} == {b}: # {line}")
192
- pseudo_code.append(" pass")
193
- else:
194
- pseudo_code.append(f" # {line}")
195
-
196
- pseudo_code.append("")
197
-
198
- pseudo_code.append("if __name__ == '__main__':")
199
- pseudo_code.append(" main()")
200
- pseudo_code.append("")
201
-
202
- return "\n".join(pseudo_code)
203
-
204
- def unpack_exe(file_path, output_dir):
205
- """Extract information from an EXE file and attempt to convert to Python"""
206
- try:
207
  pe = pefile.PE(file_path)
208
 
209
- # Basic PE information
210
- info = {
211
  "Machine": hex(pe.FILE_HEADER.Machine),
212
  "TimeDateStamp": pe.FILE_HEADER.TimeDateStamp,
213
  "NumberOfSections": pe.FILE_HEADER.NumberOfSections,
214
- "Sections": []
 
 
 
215
  }
216
 
217
- # Get section information
 
218
  for section in pe.sections:
219
  section_name = section.Name.decode('utf-8', errors='ignore').strip('\x00')
220
- info["Sections"].append({
221
  "Name": section_name,
222
  "VirtualAddress": hex(section.VirtualAddress),
 
223
  "SizeOfRawData": section.SizeOfRawData,
224
- "Entropy": section.get_entropy()
 
225
  })
226
 
227
- # Get imports
 
228
  if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'):
229
- info["Imports"] = []
230
  for entry in pe.DIRECTORY_ENTRY_IMPORT:
231
  dll_name = entry.dll.decode('utf-8', errors='ignore')
232
  imports = []
233
  for imp in entry.imports:
234
  if imp.name:
235
  imports.append(imp.name.decode('utf-8', errors='ignore'))
236
- info["Imports"].append({
237
  "DLL": dll_name,
238
  "Functions": imports
239
  })
240
 
241
- # Extract resources if present
242
- if hasattr(pe, 'DIRECTORY_ENTRY_RESOURCE'):
243
- resource_dir = os.path.join(output_dir, "resources")
244
- os.makedirs(resource_dir, exist_ok=True)
245
- extracted_resources = []
246
-
247
- for resource_type in pe.DIRECTORY_ENTRY_RESOURCE.entries:
248
- if hasattr(resource_type, 'directory'):
249
- for resource_id in resource_type.directory.entries:
250
- if hasattr(resource_id, 'directory'):
251
- for resource_lang in resource_id.directory.entries:
252
- data = pe.get_data(resource_lang.data.struct.OffsetToData, resource_lang.data.struct.Size)
253
- resource_filename = f"resource_{resource_type.id}_{resource_id.id}_{resource_lang.id}"
254
- resource_path = os.path.join(resource_dir, resource_filename)
255
- with open(resource_path, 'wb') as f:
256
- f.write(data)
257
- extracted_resources.append(resource_filename)
258
-
259
- info["ExtractedResources"] = extracted_resources
260
-
261
- # Try to disassemble and convert to pseudo-Python
262
- disassembly_result = disassemble_binary(file_path)
263
-
264
- # Try PyInstaller extraction for Python executables
265
- pyinstaller_result = try_pyinstaller_extraction(file_path, output_dir)
266
-
267
- return {
268
- "basic_info": info,
269
- "disassembly": disassembly_result,
270
- "pyinstaller": pyinstaller_result
271
- }
272
- except Exception as e:
273
- return {"Error": str(e)}
274
-
275
- def analyze_dll(file_path):
276
- """Extract information and code from a DLL file"""
277
- try:
278
- pe = pefile.PE(file_path)
279
-
280
- # Basic information
281
- info = {
282
- "Machine": hex(pe.FILE_HEADER.Machine),
283
- "TimeDateStamp": pe.FILE_HEADER.TimeDateStamp,
284
- "NumberOfSections": pe.FILE_HEADER.NumberOfSections,
285
- "Characteristics": hex(pe.FILE_HEADER.Characteristics),
286
- "DllCharacteristics": hex(pe.OPTIONAL_HEADER.DllCharacteristics),
287
- "Sections": []
288
- }
289
-
290
- # Get section information
291
- for section in pe.sections:
292
- section_name = section.Name.decode('utf-8', errors='ignore').strip('\x00')
293
- info["Sections"].append({
294
- "Name": section_name,
295
- "VirtualAddress": hex(section.VirtualAddress),
296
- "SizeOfRawData": section.SizeOfRawData,
297
- "Entropy": section.get_entropy()
298
- })
299
-
300
- # Get exports
301
- if hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'):
302
- info["Exports"] = []
303
  for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols:
304
  if exp.name:
305
- info["Exports"].append(exp.name.decode('utf-8', errors='ignore'))
 
 
 
 
306
 
307
- # Get imports
308
- if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'):
309
- info["Imports"] = []
310
- for entry in pe.DIRECTORY_ENTRY_IMPORT:
311
- dll_name = entry.dll.decode('utf-8', errors='ignore')
312
- imports = []
313
- for imp in entry.imports:
314
- if imp.name:
315
- imports.append(imp.name.decode('utf-8', errors='ignore'))
316
- info["Imports"].append({
317
- "DLL": dll_name,
318
- "Functions": imports
319
- })
320
 
321
- # Try to disassemble and convert to pseudo-Python
322
- disassembly_result = disassemble_binary(file_path, is_dll=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
 
324
- return {
325
- "basic_info": info,
326
- "disassembly": disassembly_result
327
- }
328
  except Exception as e:
329
  return {"Error": str(e)}
330
 
@@ -350,12 +309,13 @@ def process_zip_file(file_path, temp_dir):
350
  os.makedirs(exe_output_dir, exist_ok=True)
351
  nested_files[rel_path] = {
352
  'type': 'exe',
353
- 'info': unpack_exe(full_path, exe_output_dir)
 
354
  }
355
  elif file.endswith('.dll'):
356
  nested_files[rel_path] = {
357
  'type': 'dll',
358
- 'info': analyze_dll(full_path)
359
  }
360
 
361
  return {
@@ -377,133 +337,260 @@ if uploaded_file is not None:
377
 
378
  st.success(f"File uploaded: {uploaded_file.name}")
379
 
 
 
 
 
380
  # Process based on file type
381
  if uploaded_file.name.lower().endswith('.zip'):
382
  st.subheader("ZIP File Contents")
383
- output_dir = os.path.join(temp_dir, "extracted")
384
- os.makedirs(output_dir, exist_ok=True)
385
-
386
- result = process_zip_file(file_path, output_dir)
387
-
388
- if 'error' in result:
389
- st.error(f"Error processing ZIP file: {result['error']}")
390
- else:
391
- with st.expander("ZIP Contents", expanded=True):
392
- st.write(f"Total files: {len(result['file_list'])}")
393
- st.json(result['file_list'])
394
 
395
- if result['nested_files']:
396
- st.subheader("Detected Executable Files")
397
- for file_path, file_info in result['nested_files'].items():
398
- with st.expander(f"{file_path} ({file_info['type'].upper()})"):
399
- if file_info['type'] == 'exe':
400
- tabs = st.tabs(["Basic Info", "Python Code", "Disassembly"])
401
- with tabs[0]:
402
- st.json(file_info['info'].get('basic_info', {}))
403
- with tabs[1]:
404
- pyinstaller_result = file_info['info'].get('pyinstaller', {})
405
- disassembly = file_info['info'].get('disassembly', {})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
 
407
- if pyinstaller_result.get('success', False):
408
- st.success("Python code extracted successfully!")
409
- for filename, content in pyinstaller_result.get('files', {}).items():
410
- with st.expander(f"Python File: {filename}"):
411
- st.code(content, language="python")
412
- else:
413
- st.warning("Not a Python executable or extraction failed.")
414
- st.subheader("Generated Python-like Code")
415
- st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python")
416
- with tabs[2]:
417
- disassembly = file_info['info'].get('disassembly', {})
418
- if disassembly.get('success', False):
419
- for section in disassembly.get('code_sections', []):
420
- with st.expander(f"Section: {section['name']}"):
421
- st.code("\n".join(section['disassembly']), language="asm")
422
- else:
423
- st.error(disassembly.get('message', "Disassembly failed"))
424
- else: # DLL
425
- tabs = st.tabs(["Basic Info", "DLL Code", "Exports/Imports"])
426
- with tabs[0]:
427
- st.json(file_info['info'].get('basic_info', {}))
428
- with tabs[1]:
429
- disassembly = file_info['info'].get('disassembly', {})
430
- st.subheader("Generated Python-like Code")
431
- st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python")
432
- with tabs[2]:
433
- basic_info = file_info['info'].get('basic_info', {})
434
- if 'Exports' in basic_info:
435
  st.subheader("Exported Functions")
436
- st.json(basic_info['Exports'])
437
- if 'Imports' in basic_info:
438
- st.subheader("Imported DLLs and Functions")
439
- st.json(basic_info['Imports'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
440
 
441
  elif uploaded_file.name.lower().endswith('.exe'):
442
- st.subheader("EXE File Analysis and Decompilation")
443
- output_dir = os.path.join(temp_dir, "exe_unpacked")
444
- os.makedirs(output_dir, exist_ok=True)
445
-
446
- try:
447
- exe_info = unpack_exe(file_path, output_dir)
 
 
 
 
 
 
448
 
449
- tabs = st.tabs(["Basic Info", "Python Code", "Disassembly", "Resources"])
450
  with tabs[0]:
451
- st.json(exe_info.get('basic_info', {}))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
  with tabs[1]:
453
- pyinstaller_result = exe_info.get('pyinstaller', {})
454
- disassembly = exe_info.get('disassembly', {})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455
 
456
- if pyinstaller_result.get('success', False):
 
 
 
 
 
457
  st.success("Python code extracted successfully!")
458
- for filename, content in pyinstaller_result.get('files', {}).items():
459
  with st.expander(f"Python File: {filename}"):
460
  st.code(content, language="python")
461
  else:
462
- st.warning(pyinstaller_result.get('message', "Not a Python executable or extraction failed."))
463
- st.subheader("Generated Python-like Code")
464
- st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python")
465
- with tabs[2]:
466
- disassembly = exe_info.get('disassembly', {})
467
- if disassembly.get('success', False):
468
- for section in disassembly.get('code_sections', []):
469
- with st.expander(f"Section: {section['name']}"):
470
- st.code("\n".join(section['disassembly']), language="asm")
471
- else:
472
- st.error(disassembly.get('message', "Disassembly failed"))
473
- with tabs[3]:
474
- basic_info = exe_info.get('basic_info', {})
475
- if 'ExtractedResources' in basic_info:
476
- st.write(f"Found {len(basic_info['ExtractedResources'])} resources")
477
- for resource in basic_info['ExtractedResources']:
478
- st.text(f"Resource: {resource}")
479
- except Exception as e:
480
- st.error(f"Error analyzing EXE file: {str(e)}")
481
 
482
  elif uploaded_file.name.lower().endswith('.dll'):
483
- st.subheader("DLL File Analysis and Decompilation")
484
- try:
485
- dll_info = analyze_dll(file_path)
 
 
 
486
 
487
- tabs = st.tabs(["Basic Info", "DLL Code", "Exports/Imports"])
488
  with tabs[0]:
489
- st.json(dll_info.get('basic_info', {}))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
490
  with tabs[1]:
491
- disassembly = dll_info.get('disassembly', {})
492
- st.subheader("Generated Python-like Code")
493
- st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python")
494
-
495
- if disassembly.get('success', False):
496
- st.subheader("Assembly Code")
497
- for section in disassembly.get('code_sections', []):
498
- with st.expander(f"Section: {section['name']}"):
499
- st.code("\n".join(section['disassembly']), language="asm")
500
  with tabs[2]:
501
- basic_info = dll_info.get('basic_info', {})
502
- if 'Exports' in basic_info:
503
- st.subheader("Exported Functions")
504
- st.json(basic_info['Exports'])
505
- if 'Imports' in basic_info:
506
- st.subheader("Imported DLLs and Functions")
507
- st.json(basic_info['Imports'])
508
- except Exception as e:
509
- st.error(f"Error analyzing DLL file: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import zipfile
4
  import tempfile
5
  import pefile
 
6
  import subprocess
7
+ import binascii
8
  import re
9
+ import json
10
  from pathlib import Path
 
11
 
12
+ st.set_page_config(page_title="Executable Reverse Engineer", page_icon="🔍", layout="wide")
13
 
14
+ st.title("Executable Reverse Engineering Tool")
15
  st.markdown("""
16
+ This tool performs reverse engineering on executables (.exe/.dll) to show their inner workings:
17
+ - Disassembles machine code to view assembly instructions
18
+ - Attempts to decompile to pseudocode
19
+ - Shows strings, imports, and other binary artifacts
20
+ - Works with executables from any programming language
21
  """)
22
 
23
+ # Install necessary packages at startup
24
+ try:
25
+ with st.spinner("Setting up reverse engineering environment..."):
26
+ # Install key analysis libraries
27
+ subprocess.run(["pip", "install", "pyinstxtractor"], capture_output=True)
28
+ subprocess.run(["pip", "install", "uncompyle6"], capture_output=True)
29
+ subprocess.run(["pip", "install", "capstone"], capture_output=True)
30
+ subprocess.run(["pip", "install", "r2pipe"], capture_output=True)
31
+
32
+ # Import after installation
33
+ import r2pipe
34
+ from capstone import Cs, CS_ARCH_X86, CS_MODE_32, CS_MODE_64
35
+ st.success("Environment ready")
36
+ except Exception as e:
37
+ st.error(f"Setup error: {str(e)}")
38
+
39
+ def extract_strings(file_path, min_length=4):
40
+ """Extract ASCII and Unicode strings from binary file"""
41
+ try:
42
+ with open(file_path, 'rb') as f:
43
+ content = f.read()
44
+
45
+ # Extract ASCII strings
46
+ ascii_pattern = b'[\\x20-\\x7E]{' + str(min_length).encode() + b',}'
47
+ ascii_strings = re.findall(ascii_pattern, content)
48
+ ascii_strings = [s.decode('ascii') for s in ascii_strings]
49
+
50
+ # Extract Unicode strings (basic approach)
51
+ unicode_pattern = b'(?:[\x20-\x7E]\x00){' + str(min_length).encode() + b',}'
52
+ unicode_strings = re.findall(unicode_pattern, content)
53
+ unicode_strings = [s.decode('utf-16-le', errors='ignore').rstrip('\x00') for s in unicode_strings]
54
+
55
+ return sorted(set(ascii_strings + unicode_strings))
56
+ except Exception as e:
57
+ return [f"Error extracting strings: {str(e)}"]
58
+
59
+ def analyze_with_radare2(file_path):
60
+ """Use radare2 through r2pipe for deep analysis"""
61
+ try:
62
+ # Open file with r2pipe
63
+ r2 = r2pipe.open(file_path)
64
+
65
+ # Perform initial analysis
66
+ r2.cmd("aaa") # Analyze all
67
+
68
+ # Get basic information
69
+ info = json.loads(r2.cmd("ij"))
70
+
71
+ # Get entry point
72
+ entry_point = r2.cmd("ie")
73
+
74
+ # Get imports
75
+ imports = r2.cmd("iij")
76
+ imports = json.loads(imports) if imports else []
77
+
78
+ # Get exports (for DLLs)
79
+ exports = r2.cmd("iEj")
80
+ exports = json.loads(exports) if exports else []
81
+
82
+ # Get sections
83
+ sections = r2.cmd("iSj")
84
+ sections = json.loads(sections) if sections else []
85
+
86
+ # Disassemble main function
87
+ main_disasm = r2.cmd("s main; pdf")
88
+ if not main_disasm or "Cannot find function" in main_disasm:
89
+ main_disasm = r2.cmd("s entry0; pdf") # Try entry point instead
90
+
91
+ # Get function list
92
+ functions = r2.cmd("aflj")
93
+ functions = json.loads(functions) if functions else []
94
+
95
+ # Get decompiled pseudocode (if available)
96
+ pseudocode = r2.cmd("s main; pdc")
97
+ if not pseudocode or pseudocode.strip() == "":
98
+ pseudocode = r2.cmd("s entry0; pdc") # Try entry point instead
99
+
100
+ # Close r2
101
+ r2.quit()
102
+
103
+ return {
104
+ "info": info,
105
+ "entry_point": entry_point,
106
+ "imports": imports,
107
+ "exports": exports,
108
+ "sections": sections,
109
+ "main_disasm": main_disasm,
110
+ "functions": functions,
111
+ "pseudocode": pseudocode
112
+ }
113
+ except Exception as e:
114
+ return {"error": f"Radare2 analysis failed: {str(e)}"}
115
+
116
  def try_pyinstaller_extraction(file_path, output_dir):
117
  """Attempt to extract Python scripts from PyInstaller executables"""
118
  try:
 
 
 
119
  # Run pyinstxtractor on the file
120
  result = subprocess.run(["python", "-m", "pyinstxtractor", file_path],
121
  cwd=output_dir, capture_output=True, text=True)
 
124
 
125
  if os.path.exists(extracted_dir):
126
  # Try to decompile the Python bytecode files
 
 
127
  python_files = {}
128
  for root, _, files in os.walk(extracted_dir):
129
  for file in files:
 
131
  pyc_path = os.path.join(root, file)
132
  py_path = pyc_path + ".py"
133
  try:
134
+ # Try decompiling with uncompyle6
135
  subprocess.run(["uncompyle6", pyc_path, "-o", py_path], capture_output=True)
136
  if os.path.exists(py_path):
137
  with open(py_path, 'r', encoding='utf-8', errors='ignore') as f:
 
139
  python_files[rel_path] = f.read()
140
  except:
141
  pass
142
+
143
+ # Check if we found the PYZ archive (PyInstaller specific)
144
+ pyz_files = []
145
+ for root, _, files in os.walk(extracted_dir):
146
+ for file in files:
147
+ if file.endswith('.pyz'):
148
+ pyz_path = os.path.join(root, file)
149
+ # Extract PYZ contents
150
+ pyz_extract_dir = pyz_path + "_extracted"
151
+ os.makedirs(pyz_extract_dir, exist_ok=True)
152
+ try:
153
+ # Extract PYZ files (these contain most of the Python modules)
154
+ subprocess.run(["python", "-m", "pyinstxtractor", pyz_path],
155
+ cwd=output_dir, capture_output=True)
156
+ for pyz_root, _, pyz_files in os.walk(pyz_extract_dir):
157
+ for pyz_file in pyz_files:
158
+ if pyz_file.endswith('.pyc') or pyz_file.endswith('.pyo'):
159
+ pyc_path = os.path.join(pyz_root, pyz_file)
160
+ py_path = pyc_path + ".py"
161
+ try:
162
+ subprocess.run(["uncompyle6", pyc_path, "-o", py_path], capture_output=True)
163
+ if os.path.exists(py_path):
164
+ with open(py_path, 'r', encoding='utf-8', errors='ignore') as f:
165
+ rel_path = os.path.join("PYZ_ARCHIVE", os.path.relpath(pyc_path, pyz_extract_dir))
166
+ python_files[rel_path] = f.read()
167
+ except:
168
+ pass
169
+ except:
170
+ pass
171
 
172
  return {
173
  "success": True,
 
184
  "message": f"PyInstaller extraction error: {str(e)}"
185
  }
186
 
187
+ def analyze_binary(file_path, is_dll=False):
188
+ """Comprehensive analysis of a binary file"""
189
  try:
190
+ results = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
+ # Basic PE information using pefile
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  pe = pefile.PE(file_path)
194
 
195
+ # General info
196
+ results["basic_info"] = {
197
  "Machine": hex(pe.FILE_HEADER.Machine),
198
  "TimeDateStamp": pe.FILE_HEADER.TimeDateStamp,
199
  "NumberOfSections": pe.FILE_HEADER.NumberOfSections,
200
+ "SizeOfImage": pe.OPTIONAL_HEADER.SizeOfImage,
201
+ "EntryPoint": hex(pe.OPTIONAL_HEADER.AddressOfEntryPoint),
202
+ "ImageBase": hex(pe.OPTIONAL_HEADER.ImageBase),
203
+ "Subsystem": pe.OPTIONAL_HEADER.Subsystem
204
  }
205
 
206
+ # Section information
207
+ results["sections"] = []
208
  for section in pe.sections:
209
  section_name = section.Name.decode('utf-8', errors='ignore').strip('\x00')
210
+ results["sections"].append({
211
  "Name": section_name,
212
  "VirtualAddress": hex(section.VirtualAddress),
213
+ "VirtualSize": section.Misc_VirtualSize,
214
  "SizeOfRawData": section.SizeOfRawData,
215
+ "Entropy": section.get_entropy(),
216
+ "Characteristics": hex(section.Characteristics)
217
  })
218
 
219
+ # Import information
220
+ results["imports"] = []
221
  if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'):
 
222
  for entry in pe.DIRECTORY_ENTRY_IMPORT:
223
  dll_name = entry.dll.decode('utf-8', errors='ignore')
224
  imports = []
225
  for imp in entry.imports:
226
  if imp.name:
227
  imports.append(imp.name.decode('utf-8', errors='ignore'))
228
+ results["imports"].append({
229
  "DLL": dll_name,
230
  "Functions": imports
231
  })
232
 
233
+ # Export information (for DLLs)
234
+ results["exports"] = []
235
+ if is_dll and hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols:
237
  if exp.name:
238
+ results["exports"].append({
239
+ "Name": exp.name.decode('utf-8', errors='ignore'),
240
+ "Address": hex(pe.OPTIONAL_HEADER.ImageBase + exp.address),
241
+ "Ordinal": exp.ordinal
242
+ })
243
 
244
+ # Extract strings
245
+ results["strings"] = extract_strings(file_path)
 
 
 
 
 
 
 
 
 
 
 
246
 
247
+ # Use Radare2 for deeper analysis if available
248
+ try:
249
+ r2_results = analyze_with_radare2(file_path)
250
+ if "error" not in r2_results:
251
+ results["disassembly"] = r2_results["main_disasm"]
252
+ results["functions"] = r2_results["functions"]
253
+ results["pseudocode"] = r2_results["pseudocode"]
254
+ else:
255
+ # Fallback to basic disassembly with Capstone
256
+ from capstone import Cs, CS_ARCH_X86, CS_MODE_32, CS_MODE_64
257
+
258
+ # Determine if 32-bit or 64-bit
259
+ is_64bit = pe.OPTIONAL_HEADER.Magic == 0x20b
260
+ mode = CS_MODE_64 if is_64bit else CS_MODE_32
261
+
262
+ # Initialize disassembler
263
+ md = Cs(CS_ARCH_X86, mode)
264
+ md.detail = True
265
+
266
+ # Find the entry point
267
+ entry_rva = pe.OPTIONAL_HEADER.AddressOfEntryPoint
268
+ for section in pe.sections:
269
+ if section.contains_rva(entry_rva):
270
+ # Calculate file offset of entry point
271
+ entry_offset = entry_rva - section.VirtualAddress + section.PointerToRawData
272
+ entry_data = pe.get_memory_mapped_image()[entry_rva:entry_rva+512] # Get 512 bytes from entry
273
+
274
+ disassembly = []
275
+ for i, (address, size, mnemonic, op_str) in enumerate(md.disasm_lite(entry_data, pe.OPTIONAL_HEADER.ImageBase + entry_rva)):
276
+ if i >= 100: # Limit to 100 instructions for preview
277
+ break
278
+ disassembly.append(f"0x{address:08x}: {mnemonic} {op_str}")
279
+
280
+ results["disassembly"] = "\n".join(disassembly)
281
+ break
282
+ except ImportError:
283
+ # If r2pipe or capstone isn't available
284
+ results["disassembly"] = "Advanced disassembly not available. Install r2pipe or capstone."
285
 
286
+ return results
 
 
 
287
  except Exception as e:
288
  return {"Error": str(e)}
289
 
 
309
  os.makedirs(exe_output_dir, exist_ok=True)
310
  nested_files[rel_path] = {
311
  'type': 'exe',
312
+ 'analysis': analyze_binary(full_path),
313
+ 'python_extraction': try_pyinstaller_extraction(full_path, exe_output_dir)
314
  }
315
  elif file.endswith('.dll'):
316
  nested_files[rel_path] = {
317
  'type': 'dll',
318
+ 'analysis': analyze_binary(full_path, is_dll=True)
319
  }
320
 
321
  return {
 
337
 
338
  st.success(f"File uploaded: {uploaded_file.name}")
339
 
340
+ # Display file size
341
+ file_size = os.path.getsize(file_path)
342
+ st.info(f"File size: {file_size/1024:.2f} KB")
343
+
344
  # Process based on file type
345
  if uploaded_file.name.lower().endswith('.zip'):
346
  st.subheader("ZIP File Contents")
347
+ with st.spinner("Analyzing ZIP contents..."):
348
+ output_dir = os.path.join(temp_dir, "extracted")
349
+ os.makedirs(output_dir, exist_ok=True)
 
 
 
 
 
 
 
 
350
 
351
+ result = process_zip_file(file_path, output_dir)
352
+
353
+ if 'error' in result:
354
+ st.error(f"Error processing ZIP file: {result['error']}")
355
+ else:
356
+ with st.expander("ZIP Contents", expanded=True):
357
+ st.write(f"Total files: {len(result['file_list'])}")
358
+ st.code("\n".join(result['file_list']))
359
+
360
+ if result['nested_files']:
361
+ st.subheader("Detected Executable Files")
362
+ for file_path, file_info in result['nested_files'].items():
363
+ with st.expander(f"{file_path} ({file_info['type'].upper()})"):
364
+ if file_info['type'] == 'exe':
365
+ analysis = file_info['analysis']
366
+ python_extraction = file_info['python_extraction']
367
+
368
+ tabs = st.tabs(["Summary", "Imports", "Strings", "Assembly", "Python Code"])
369
+
370
+ with tabs[0]:
371
+ if "Error" in analysis:
372
+ st.error(analysis["Error"])
373
+ else:
374
+ st.json(analysis.get("basic_info", {}))
375
+
376
+ st.subheader("Sections")
377
+ sections_df = {
378
+ "Name": [],
379
+ "VirtualSize": [],
380
+ "SizeOfRawData": [],
381
+ "Entropy": []
382
+ }
383
+ for section in analysis.get("sections", []):
384
+ sections_df["Name"].append(section["Name"])
385
+ sections_df["VirtualSize"].append(section["VirtualSize"])
386
+ sections_df["SizeOfRawData"].append(section["SizeOfRawData"])
387
+ sections_df["Entropy"].append(section["Entropy"])
388
+
389
+ st.dataframe(sections_df)
390
+
391
+ with tabs[1]:
392
+ for imp in analysis.get("imports", []):
393
+ with st.expander(f"DLL: {imp['DLL']}"):
394
+ st.code("\n".join(imp["Functions"]))
395
+
396
+ with tabs[2]:
397
+ st.subheader("Strings Found")
398
+ all_strings = analysis.get("strings", [])
399
+ interesting_strings = [s for s in all_strings if len(s) > 8] # Filter out very short strings
400
+ st.code("\n".join(interesting_strings[:500])) # Limit to 500 strings
401
+
402
+ with tabs[3]:
403
+ st.subheader("Disassembly")
404
+ if "disassembly" in analysis:
405
+ st.code(analysis["disassembly"], language="asm")
406
+ else:
407
+ st.warning("Disassembly not available")
408
+
409
+ if "pseudocode" in analysis and analysis["pseudocode"]:
410
+ st.subheader("Decompiled Pseudocode")
411
+ st.code(analysis["pseudocode"], language="c")
412
+
413
+ with tabs[4]:
414
+ if python_extraction.get("success", False):
415
+ st.success("Python code extracted successfully!")
416
+ for filename, content in python_extraction.get("files", {}).items():
417
+ with st.expander(f"Python File: {filename}"):
418
+ st.code(content, language="python")
419
+ else:
420
+ st.warning(python_extraction.get("message", "Not a Python executable or extraction failed."))
421
+ else: # DLL
422
+ analysis = file_info['analysis']
423
+
424
+ tabs = st.tabs(["Summary", "Exports", "Imports", "Strings", "Assembly"])
425
+
426
+ with tabs[0]:
427
+ if "Error" in analysis:
428
+ st.error(analysis["Error"])
429
+ else:
430
+ st.json(analysis.get("basic_info", {}))
431
+
432
+ st.subheader("Sections")
433
+ sections_df = {
434
+ "Name": [],
435
+ "VirtualSize": [],
436
+ "SizeOfRawData": [],
437
+ "Entropy": []
438
+ }
439
+ for section in analysis.get("sections", []):
440
+ sections_df["Name"].append(section["Name"])
441
+ sections_df["VirtualSize"].append(section["VirtualSize"])
442
+ sections_df["SizeOfRawData"].append(section["SizeOfRawData"])
443
+ sections_df["Entropy"].append(section["Entropy"])
444
+
445
+ st.dataframe(sections_df)
446
 
447
+ with tabs[1]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
  st.subheader("Exported Functions")
449
+ st.json(analysis.get("exports", []))
450
+
451
+ with tabs[2]:
452
+ for imp in analysis.get("imports", []):
453
+ with st.expander(f"DLL: {imp['DLL']}"):
454
+ st.code("\n".join(imp["Functions"]))
455
+
456
+ with tabs[3]:
457
+ st.subheader("Strings Found")
458
+ all_strings = analysis.get("strings", [])
459
+ interesting_strings = [s for s in all_strings if len(s) > 8] # Filter out very short strings
460
+ st.code("\n".join(interesting_strings[:500])) # Limit to 500 strings
461
+
462
+ with tabs[4]:
463
+ st.subheader("Disassembly")
464
+ if "disassembly" in analysis:
465
+ st.code(analysis["disassembly"], language="asm")
466
+ else:
467
+ st.warning("Disassembly not available")
468
+
469
+ if "pseudocode" in analysis and analysis["pseudocode"]:
470
+ st.subheader("Decompiled Pseudocode")
471
+ st.code(analysis["pseudocode"], language="c")
472
 
473
  elif uploaded_file.name.lower().endswith('.exe'):
474
+ st.subheader("EXE File Analysis")
475
+ with st.spinner("Reverse engineering executable..."):
476
+ output_dir = os.path.join(temp_dir, "exe_unpacked")
477
+ os.makedirs(output_dir, exist_ok=True)
478
+
479
+ # Perform comprehensive analysis
480
+ analysis = analyze_binary(file_path)
481
+
482
+ # Try Python extraction
483
+ python_extraction = try_pyinstaller_extraction(file_path, output_dir)
484
+
485
+ tabs = st.tabs(["Summary", "Imports", "Strings", "Assembly", "Python Code"])
486
 
 
487
  with tabs[0]:
488
+ if "Error" in analysis:
489
+ st.error(analysis["Error"])
490
+ else:
491
+ st.subheader("Basic Information")
492
+ st.json(analysis.get("basic_info", {}))
493
+
494
+ st.subheader("Sections")
495
+ sections_df = {
496
+ "Name": [],
497
+ "VirtualSize": [],
498
+ "SizeOfRawData": [],
499
+ "Entropy": []
500
+ }
501
+ for section in analysis.get("sections", []):
502
+ sections_df["Name"].append(section["Name"])
503
+ sections_df["VirtualSize"].append(section["VirtualSize"])
504
+ sections_df["SizeOfRawData"].append(section["SizeOfRawData"])
505
+ sections_df["Entropy"].append(section["Entropy"])
506
+
507
+ st.dataframe(sections_df)
508
+
509
  with tabs[1]:
510
+ st.subheader("Imported Functions")
511
+ for imp in analysis.get("imports", []):
512
+ with st.expander(f"DLL: {imp['DLL']}"):
513
+ st.code("\n".join(imp["Functions"]))
514
+
515
+ with tabs[2]:
516
+ st.subheader("Strings Found")
517
+ all_strings = analysis.get("strings", [])
518
+ interesting_strings = [s for s in all_strings if len(s) > 8] # Filter out very short strings
519
+ st.code("\n".join(interesting_strings[:500])) # Limit to 500 strings
520
+
521
+ with tabs[3]:
522
+ st.subheader("Disassembly")
523
+ if "disassembly" in analysis:
524
+ st.code(analysis["disassembly"], language="asm")
525
+ else:
526
+ st.warning("Disassembly not available")
527
 
528
+ if "pseudocode" in analysis and analysis["pseudocode"]:
529
+ st.subheader("Decompiled Pseudocode")
530
+ st.code(analysis["pseudocode"], language="c")
531
+
532
+ with tabs[4]:
533
+ if python_extraction.get("success", False):
534
  st.success("Python code extracted successfully!")
535
+ for filename, content in python_extraction.get("files", {}).items():
536
  with st.expander(f"Python File: {filename}"):
537
  st.code(content, language="python")
538
  else:
539
+ st.warning(python_extraction.get("message", "Not a Python executable or extraction failed."))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
540
 
541
  elif uploaded_file.name.lower().endswith('.dll'):
542
+ st.subheader("DLL File Analysis")
543
+ with st.spinner("Reverse engineering DLL..."):
544
+ # Perform comprehensive analysis (with is_dll=True)
545
+ analysis = analyze_binary(file_path, is_dll=True)
546
+
547
+ tabs = st.tabs(["Summary", "Exports", "Imports", "Strings", "Assembly"])
548
 
 
549
  with tabs[0]:
550
+ if "Error" in analysis:
551
+ st.error(analysis["Error"])
552
+ else:
553
+ st.subheader("Basic Information")
554
+ st.json(analysis.get("basic_info", {}))
555
+
556
+ st.subheader("Sections")
557
+ sections_df = {
558
+ "Name": [],
559
+ "VirtualSize": [],
560
+ "SizeOfRawData": [],
561
+ "Entropy": []
562
+ }
563
+ for section in analysis.get("sections", []):
564
+ sections_df["Name"].append(section["Name"])
565
+ sections_df["VirtualSize"].append(section["VirtualSize"])
566
+ sections_df["SizeOfRawData"].append(section["SizeOfRawData"])
567
+ sections_df["Entropy"].append(section["Entropy"])
568
+
569
+ st.dataframe(sections_df)
570
+
571
  with tabs[1]:
572
+ st.subheader("Exported Functions")
573
+ st.json(analysis.get("exports", []))
574
+
 
 
 
 
 
 
575
  with tabs[2]:
576
+ st.subheader("Imported Functions")
577
+ for imp in analysis.get("imports", []):
578
+ with st.expander(f"DLL: {imp['DLL']}"):
579
+ st.code("\n".join(imp["Functions"]))
580
+
581
+ with tabs[3]:
582
+ st.subheader("Strings Found")
583
+ all_strings = analysis.get("strings", [])
584
+ interesting_strings = [s for s in all_strings if len(s) > 8] # Filter out very short strings
585
+ st.code("\n".join(interesting_strings[:500])) # Limit to 500 strings
586
+
587
+ with tabs[4]:
588
+ st.subheader("Disassembly")
589
+ if "disassembly" in analysis:
590
+ st.code(analysis["disassembly"], language="asm")
591
+ else:
592
+ st.warning("Disassembly not available")
593
+
594
+ if "pseudocode" in analysis and analysis["pseudocode"]:
595
+ st.subheader("Decompiled Pseudocode")
596
+ st.code(analysis["pseudocode"], language="c")