euler314 commited on
Commit
157cf12
·
verified ·
1 Parent(s): 43d0569

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +313 -26
app.py CHANGED
@@ -4,26 +4,209 @@ import zipfile
4
  import tempfile
5
  import pefile
6
  import shutil
 
 
 
7
  from pathlib import Path
 
8
 
9
- st.set_page_config(page_title="File Analyzer", page_icon="🔍")
10
 
11
- st.title("File Analysis Tool")
12
  st.markdown("""
13
- This tool allows you to analyze various file types:
14
  - Extract and view contents of .zip files
15
- - Unpack .exe files to examine their contents
16
- - View information about .dll files
 
17
  """)
18
 
19
- # No size limit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  def unpack_exe(file_path, output_dir):
22
- """Extract information from an EXE file using pefile"""
23
  try:
24
  pe = pefile.PE(file_path)
25
 
26
- # Create a basic info dictionary
27
  info = {
28
  "Machine": hex(pe.FILE_HEADER.Machine),
29
  "TimeDateStamp": pe.FILE_HEADER.TimeDateStamp,
@@ -59,6 +242,7 @@ def unpack_exe(file_path, output_dir):
59
  if hasattr(pe, 'DIRECTORY_ENTRY_RESOURCE'):
60
  resource_dir = os.path.join(output_dir, "resources")
61
  os.makedirs(resource_dir, exist_ok=True)
 
62
 
63
  for resource_type in pe.DIRECTORY_ENTRY_RESOURCE.entries:
64
  if hasattr(resource_type, 'directory'):
@@ -67,19 +251,33 @@ def unpack_exe(file_path, output_dir):
67
  for resource_lang in resource_id.directory.entries:
68
  data = pe.get_data(resource_lang.data.struct.OffsetToData, resource_lang.data.struct.Size)
69
  resource_filename = f"resource_{resource_type.id}_{resource_id.id}_{resource_lang.id}"
70
- with open(os.path.join(resource_dir, resource_filename), 'wb') as f:
 
71
  f.write(data)
 
 
 
 
 
 
 
 
 
72
 
73
- return info
 
 
 
 
74
  except Exception as e:
75
  return {"Error": str(e)}
76
 
77
  def analyze_dll(file_path):
78
- """Extract information from a DLL file using pefile"""
79
  try:
80
  pe = pefile.PE(file_path)
81
 
82
- # Create a basic info dictionary
83
  info = {
84
  "Machine": hex(pe.FILE_HEADER.Machine),
85
  "TimeDateStamp": pe.FILE_HEADER.TimeDateStamp,
@@ -99,7 +297,7 @@ def analyze_dll(file_path):
99
  "Entropy": section.get_entropy()
100
  })
101
 
102
- # Get exports if present
103
  if hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'):
104
  info["Exports"] = []
105
  for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols:
@@ -120,7 +318,13 @@ def analyze_dll(file_path):
120
  "Functions": imports
121
  })
122
 
123
- return info
 
 
 
 
 
 
124
  except Exception as e:
125
  return {"Error": str(e)}
126
 
@@ -165,7 +369,6 @@ def process_zip_file(file_path, temp_dir):
165
  uploaded_file = st.file_uploader("Upload a file (.zip, .exe, or .dll)", type=["zip", "exe", "dll"])
166
 
167
  if uploaded_file is not None:
168
- # Process file without size check
169
  with tempfile.TemporaryDirectory() as temp_dir:
170
  # Save the uploaded file to the temporary directory
171
  file_path = os.path.join(temp_dir, uploaded_file.name)
@@ -193,30 +396,114 @@ if uploaded_file is not None:
193
  st.subheader("Detected Executable Files")
194
  for file_path, file_info in result['nested_files'].items():
195
  with st.expander(f"{file_path} ({file_info['type'].upper()})"):
196
- st.json(file_info['info'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
  elif uploaded_file.name.lower().endswith('.exe'):
199
- st.subheader("EXE File Analysis")
200
  output_dir = os.path.join(temp_dir, "exe_unpacked")
201
  os.makedirs(output_dir, exist_ok=True)
202
 
203
  try:
204
  exe_info = unpack_exe(file_path, output_dir)
205
- st.json(exe_info)
206
 
207
- # Check if resources were extracted
208
- resource_dir = os.path.join(output_dir, "resources")
209
- if os.path.exists(resource_dir) and os.listdir(resource_dir):
210
- st.subheader("Extracted Resources")
211
- for resource_file in os.listdir(resource_dir):
212
- st.text(f"Resource: {resource_file}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  except Exception as e:
214
  st.error(f"Error analyzing EXE file: {str(e)}")
215
 
216
  elif uploaded_file.name.lower().endswith('.dll'):
217
- st.subheader("DLL File Information")
218
  try:
219
  dll_info = analyze_dll(file_path)
220
- st.json(dll_info)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  except Exception as e:
222
  st.error(f"Error analyzing DLL file: {str(e)}")
 
4
  import tempfile
5
  import pefile
6
  import shutil
7
+ import subprocess
8
+ import re
9
+ import struct
10
  from pathlib import Path
11
+ from capstone import Cs, CS_ARCH_X86, CS_MODE_32, CS_MODE_64
12
 
13
+ st.set_page_config(page_title="Advanced File Analyzer", page_icon="🔍", layout="wide")
14
 
15
+ st.title("Advanced File Analysis Tool")
16
  st.markdown("""
17
+ This tool allows you to:
18
  - Extract and view contents of .zip files
19
+ - Decompile .exe files to Python-like code
20
+ - View decompiled code from .dll files
21
+ - Automatically analyze nested executables
22
  """)
23
 
24
+ def try_pyinstaller_extraction(file_path, output_dir):
25
+ """Attempt to extract Python scripts from PyInstaller executables"""
26
+ try:
27
+ # Install pyinstxtractor if not already present
28
+ subprocess.run(["pip", "install", "pyinstxtractor"], capture_output=True)
29
+
30
+ # Run pyinstxtractor on the file
31
+ result = subprocess.run(["python", "-m", "pyinstxtractor", file_path],
32
+ cwd=output_dir, capture_output=True, text=True)
33
+
34
+ extracted_dir = os.path.join(output_dir, os.path.basename(file_path) + "_extracted")
35
+
36
+ if os.path.exists(extracted_dir):
37
+ # Try to decompile the Python bytecode files
38
+ subprocess.run(["pip", "install", "uncompyle6"], capture_output=True)
39
+
40
+ python_files = {}
41
+ for root, _, files in os.walk(extracted_dir):
42
+ for file in files:
43
+ if file.endswith('.pyc') or file.endswith('.pyo'):
44
+ pyc_path = os.path.join(root, file)
45
+ py_path = pyc_path + ".py"
46
+ try:
47
+ subprocess.run(["uncompyle6", pyc_path, "-o", py_path], capture_output=True)
48
+ if os.path.exists(py_path):
49
+ with open(py_path, 'r', encoding='utf-8', errors='ignore') as f:
50
+ rel_path = os.path.relpath(pyc_path, extracted_dir)
51
+ python_files[rel_path] = f.read()
52
+ except:
53
+ pass
54
+
55
+ return {
56
+ "success": True,
57
+ "message": "Successfully extracted Python code",
58
+ "files": python_files
59
+ }
60
+ return {
61
+ "success": False,
62
+ "message": "Not a PyInstaller executable or extraction failed"
63
+ }
64
+ except Exception as e:
65
+ return {
66
+ "success": False,
67
+ "message": f"PyInstaller extraction error: {str(e)}"
68
+ }
69
+
70
+ def disassemble_binary(file_path, is_dll=False):
71
+ """Disassemble a binary file to approximate code"""
72
+ try:
73
+ pe = pefile.PE(file_path)
74
+
75
+ # Determine if 32-bit or 64-bit
76
+ is_64bit = pe.OPTIONAL_HEADER.Magic == 0x20b
77
+ mode = CS_MODE_64 if is_64bit else CS_MODE_32
78
+
79
+ # Initialize disassembler
80
+ md = Cs(CS_ARCH_X86, mode)
81
+ md.detail = True
82
+
83
+ code_sections = []
84
+
85
+ # Find the entry point for EXEs
86
+ entry_point = None
87
+ if not is_dll:
88
+ entry_rva = pe.OPTIONAL_HEADER.AddressOfEntryPoint
89
+ for section in pe.sections:
90
+ if section.contains_rva(entry_rva):
91
+ entry_offset = entry_rva - section.VirtualAddress + section.PointerToRawData
92
+ entry_point = entry_offset
93
+
94
+ # Process each section
95
+ for section in pe.sections:
96
+ section_name = section.Name.decode('utf-8', errors='ignore').strip('\x00')
97
+
98
+ # Focus on code sections
99
+ if section.Characteristics & 0x20000000: # IMAGE_SCN_CNT_CODE
100
+ section_data = pe.get_data(section.VirtualAddress, section.SizeOfRawData)
101
+
102
+ # Start disassembly from entry point if in this section
103
+ start_offset = 0
104
+ if entry_point is not None and section.contains_rva(pe.OPTIONAL_HEADER.AddressOfEntryPoint):
105
+ start_offset = entry_point - section.PointerToRawData
106
+
107
+ # Limit to reasonable size for preview
108
+ preview_size = min(len(section_data) - start_offset, 4096)
109
+
110
+ # Disassemble
111
+ disassembly = []
112
+ is_entry = entry_point is not None and section.contains_rva(pe.OPTIONAL_HEADER.AddressOfEntryPoint)
113
+
114
+ for i, (address, size, mnemonic, op_str) in enumerate(md.disasm_lite(
115
+ section_data[start_offset:start_offset+preview_size],
116
+ section.VirtualAddress + start_offset)):
117
+
118
+ # Mark entry point
119
+ entry_marker = "ENTRY POINT -> " if is_entry and i == 0 else ""
120
+ disassembly.append(f"{entry_marker}0x{address:08x}: {mnemonic} {op_str}")
121
+
122
+ code_sections.append({
123
+ "name": section_name,
124
+ "disassembly": disassembly
125
+ })
126
+
127
+ # Get exports for DLLs
128
+ exports = []
129
+ if is_dll and hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'):
130
+ for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols:
131
+ if exp.name:
132
+ exports.append(exp.name.decode('utf-8', errors='ignore'))
133
+
134
+ # Convert to pseudo-Python
135
+ pseudo_python = generate_pseudo_python(code_sections, exports, is_dll)
136
+
137
+ return {
138
+ "success": True,
139
+ "code_sections": code_sections,
140
+ "exports": exports,
141
+ "pseudo_python": pseudo_python
142
+ }
143
+ except Exception as e:
144
+ return {
145
+ "success": False,
146
+ "message": f"Disassembly error: {str(e)}"
147
+ }
148
+
149
+ def generate_pseudo_python(code_sections, exports, is_dll):
150
+ """Generate pseudo-Python code from disassembly"""
151
+ pseudo_code = []
152
+
153
+ pseudo_code.append("# This is an approximation generated from binary code")
154
+ pseudo_code.append("# It is NOT the original source code")
155
+ pseudo_code.append("")
156
+
157
+ if is_dll:
158
+ pseudo_code.append("# DLL Export Functions")
159
+ for export in exports:
160
+ pseudo_code.append(f"def {export}():")
161
+ pseudo_code.append(" # Implementation not recoverable from binary")
162
+ pseudo_code.append(" pass")
163
+ pseudo_code.append("")
164
+
165
+ for section in code_sections:
166
+ pseudo_code.append(f"# Code Section: {section['name']}")
167
+ pseudo_code.append("def main():")
168
+
169
+ # Very basic pattern recognition for common operations
170
+ in_function = False
171
+ current_function = None
172
+
173
+ # Extract common patterns from assembly
174
+ for line in section['disassembly'][:100]: # Limit to first 100 instructions for preview
175
+ # Look for call instructions to infer function calls
176
+ if "call" in line and "0x" in line:
177
+ addr = re.search(r'0x[0-9a-f]+', line)
178
+ if addr:
179
+ pseudo_code.append(f" call_function_{addr.group(0)}()")
180
+ # Look for mov instructions to infer variable assignments
181
+ elif "mov" in line:
182
+ reg_match = re.search(r'mov\s+(\w+),\s+(.+)', line)
183
+ if reg_match:
184
+ dest, source = reg_match.groups()
185
+ pseudo_code.append(f" {dest} = {source} # {line}")
186
+ # Look for common comparisons
187
+ elif "cmp" in line:
188
+ cmp_match = re.search(r'cmp\s+(\w+),\s+(.+)', line)
189
+ if cmp_match:
190
+ a, b = cmp_match.groups()
191
+ pseudo_code.append(f" if {a} == {b}: # {line}")
192
+ pseudo_code.append(" pass")
193
+ else:
194
+ pseudo_code.append(f" # {line}")
195
+
196
+ pseudo_code.append("")
197
+
198
+ pseudo_code.append("if __name__ == '__main__':")
199
+ pseudo_code.append(" main()")
200
+ pseudo_code.append("")
201
+
202
+ return "\n".join(pseudo_code)
203
 
204
  def unpack_exe(file_path, output_dir):
205
+ """Extract information from an EXE file and attempt to convert to Python"""
206
  try:
207
  pe = pefile.PE(file_path)
208
 
209
+ # Basic PE information
210
  info = {
211
  "Machine": hex(pe.FILE_HEADER.Machine),
212
  "TimeDateStamp": pe.FILE_HEADER.TimeDateStamp,
 
242
  if hasattr(pe, 'DIRECTORY_ENTRY_RESOURCE'):
243
  resource_dir = os.path.join(output_dir, "resources")
244
  os.makedirs(resource_dir, exist_ok=True)
245
+ extracted_resources = []
246
 
247
  for resource_type in pe.DIRECTORY_ENTRY_RESOURCE.entries:
248
  if hasattr(resource_type, 'directory'):
 
251
  for resource_lang in resource_id.directory.entries:
252
  data = pe.get_data(resource_lang.data.struct.OffsetToData, resource_lang.data.struct.Size)
253
  resource_filename = f"resource_{resource_type.id}_{resource_id.id}_{resource_lang.id}"
254
+ resource_path = os.path.join(resource_dir, resource_filename)
255
+ with open(resource_path, 'wb') as f:
256
  f.write(data)
257
+ extracted_resources.append(resource_filename)
258
+
259
+ info["ExtractedResources"] = extracted_resources
260
+
261
+ # Try to disassemble and convert to pseudo-Python
262
+ disassembly_result = disassemble_binary(file_path)
263
+
264
+ # Try PyInstaller extraction for Python executables
265
+ pyinstaller_result = try_pyinstaller_extraction(file_path, output_dir)
266
 
267
+ return {
268
+ "basic_info": info,
269
+ "disassembly": disassembly_result,
270
+ "pyinstaller": pyinstaller_result
271
+ }
272
  except Exception as e:
273
  return {"Error": str(e)}
274
 
275
  def analyze_dll(file_path):
276
+ """Extract information and code from a DLL file"""
277
  try:
278
  pe = pefile.PE(file_path)
279
 
280
+ # Basic information
281
  info = {
282
  "Machine": hex(pe.FILE_HEADER.Machine),
283
  "TimeDateStamp": pe.FILE_HEADER.TimeDateStamp,
 
297
  "Entropy": section.get_entropy()
298
  })
299
 
300
+ # Get exports
301
  if hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'):
302
  info["Exports"] = []
303
  for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols:
 
318
  "Functions": imports
319
  })
320
 
321
+ # Try to disassemble and convert to pseudo-Python
322
+ disassembly_result = disassemble_binary(file_path, is_dll=True)
323
+
324
+ return {
325
+ "basic_info": info,
326
+ "disassembly": disassembly_result
327
+ }
328
  except Exception as e:
329
  return {"Error": str(e)}
330
 
 
369
  uploaded_file = st.file_uploader("Upload a file (.zip, .exe, or .dll)", type=["zip", "exe", "dll"])
370
 
371
  if uploaded_file is not None:
 
372
  with tempfile.TemporaryDirectory() as temp_dir:
373
  # Save the uploaded file to the temporary directory
374
  file_path = os.path.join(temp_dir, uploaded_file.name)
 
396
  st.subheader("Detected Executable Files")
397
  for file_path, file_info in result['nested_files'].items():
398
  with st.expander(f"{file_path} ({file_info['type'].upper()})"):
399
+ if file_info['type'] == 'exe':
400
+ tabs = st.tabs(["Basic Info", "Python Code", "Disassembly"])
401
+ with tabs[0]:
402
+ st.json(file_info['info'].get('basic_info', {}))
403
+ with tabs[1]:
404
+ pyinstaller_result = file_info['info'].get('pyinstaller', {})
405
+ disassembly = file_info['info'].get('disassembly', {})
406
+
407
+ if pyinstaller_result.get('success', False):
408
+ st.success("Python code extracted successfully!")
409
+ for filename, content in pyinstaller_result.get('files', {}).items():
410
+ with st.expander(f"Python File: {filename}"):
411
+ st.code(content, language="python")
412
+ else:
413
+ st.warning("Not a Python executable or extraction failed.")
414
+ st.subheader("Generated Python-like Code")
415
+ st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python")
416
+ with tabs[2]:
417
+ disassembly = file_info['info'].get('disassembly', {})
418
+ if disassembly.get('success', False):
419
+ for section in disassembly.get('code_sections', []):
420
+ with st.expander(f"Section: {section['name']}"):
421
+ st.code("\n".join(section['disassembly']), language="asm")
422
+ else:
423
+ st.error(disassembly.get('message', "Disassembly failed"))
424
+ else: # DLL
425
+ tabs = st.tabs(["Basic Info", "DLL Code", "Exports/Imports"])
426
+ with tabs[0]:
427
+ st.json(file_info['info'].get('basic_info', {}))
428
+ with tabs[1]:
429
+ disassembly = file_info['info'].get('disassembly', {})
430
+ st.subheader("Generated Python-like Code")
431
+ st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python")
432
+ with tabs[2]:
433
+ basic_info = file_info['info'].get('basic_info', {})
434
+ if 'Exports' in basic_info:
435
+ st.subheader("Exported Functions")
436
+ st.json(basic_info['Exports'])
437
+ if 'Imports' in basic_info:
438
+ st.subheader("Imported DLLs and Functions")
439
+ st.json(basic_info['Imports'])
440
 
441
  elif uploaded_file.name.lower().endswith('.exe'):
442
+ st.subheader("EXE File Analysis and Decompilation")
443
  output_dir = os.path.join(temp_dir, "exe_unpacked")
444
  os.makedirs(output_dir, exist_ok=True)
445
 
446
  try:
447
  exe_info = unpack_exe(file_path, output_dir)
 
448
 
449
+ tabs = st.tabs(["Basic Info", "Python Code", "Disassembly", "Resources"])
450
+ with tabs[0]:
451
+ st.json(exe_info.get('basic_info', {}))
452
+ with tabs[1]:
453
+ pyinstaller_result = exe_info.get('pyinstaller', {})
454
+ disassembly = exe_info.get('disassembly', {})
455
+
456
+ if pyinstaller_result.get('success', False):
457
+ st.success("Python code extracted successfully!")
458
+ for filename, content in pyinstaller_result.get('files', {}).items():
459
+ with st.expander(f"Python File: {filename}"):
460
+ st.code(content, language="python")
461
+ else:
462
+ st.warning(pyinstaller_result.get('message', "Not a Python executable or extraction failed."))
463
+ st.subheader("Generated Python-like Code")
464
+ st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python")
465
+ with tabs[2]:
466
+ disassembly = exe_info.get('disassembly', {})
467
+ if disassembly.get('success', False):
468
+ for section in disassembly.get('code_sections', []):
469
+ with st.expander(f"Section: {section['name']}"):
470
+ st.code("\n".join(section['disassembly']), language="asm")
471
+ else:
472
+ st.error(disassembly.get('message', "Disassembly failed"))
473
+ with tabs[3]:
474
+ basic_info = exe_info.get('basic_info', {})
475
+ if 'ExtractedResources' in basic_info:
476
+ st.write(f"Found {len(basic_info['ExtractedResources'])} resources")
477
+ for resource in basic_info['ExtractedResources']:
478
+ st.text(f"Resource: {resource}")
479
  except Exception as e:
480
  st.error(f"Error analyzing EXE file: {str(e)}")
481
 
482
  elif uploaded_file.name.lower().endswith('.dll'):
483
+ st.subheader("DLL File Analysis and Decompilation")
484
  try:
485
  dll_info = analyze_dll(file_path)
486
+
487
+ tabs = st.tabs(["Basic Info", "DLL Code", "Exports/Imports"])
488
+ with tabs[0]:
489
+ st.json(dll_info.get('basic_info', {}))
490
+ with tabs[1]:
491
+ disassembly = dll_info.get('disassembly', {})
492
+ st.subheader("Generated Python-like Code")
493
+ st.code(disassembly.get('pseudo_python', "# No code could be generated"), language="python")
494
+
495
+ if disassembly.get('success', False):
496
+ st.subheader("Assembly Code")
497
+ for section in disassembly.get('code_sections', []):
498
+ with st.expander(f"Section: {section['name']}"):
499
+ st.code("\n".join(section['disassembly']), language="asm")
500
+ with tabs[2]:
501
+ basic_info = dll_info.get('basic_info', {})
502
+ if 'Exports' in basic_info:
503
+ st.subheader("Exported Functions")
504
+ st.json(basic_info['Exports'])
505
+ if 'Imports' in basic_info:
506
+ st.subheader("Imported DLLs and Functions")
507
+ st.json(basic_info['Imports'])
508
  except Exception as e:
509
  st.error(f"Error analyzing DLL file: {str(e)}")