Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,462 Bytes
7ce9459 d0f659d 7ce9459 d0f659d 3f47af7 7ce9459 3f47af7 7ce9459 3f47af7 7ce9459 3f47af7 7ce9459 3f47af7 7ce9459 3f47af7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import argparse
import os
#from utils import *
from typing import Dict, List
from tqdm import tqdm
import re
class ParseError(Exception):
def __init__(self, msg):
self.msg = msg
HEADER = '#include "/home/ReSym/clang-parser/defs.hh"\n'
def process_funname(raw_addr:str) -> str:
# sub_401220 -> 401220
if raw_addr == 'main':
return raw_addr
match = re.search(r'^sub_([\w\d]+)$', raw_addr)
if match:
return match.group(1)
else:
return None
def hex_to_decimal(hex_str : str) -> int:
# Check if the input hex string is valid
if not re.match(r'^-?[0-9a-fA-F]+$', hex_str):
return None
# Convert the hex string to decimal
decimal_num = int(hex_str, 16)
return decimal_num
def extract_comments(fun_content:List[str]) -> List[Dict]:
var_decl_pattern = r'^(.+?\s+\**)(\S+);\s+\/\/(.*)$' # <g1> <g2>; // <g3>
rbp_offset_pattern = r'\[rbp(-[\d\w]+?)h\]' # [rbp-<g1>h]
array_name_pattern = r'^(.*?)\[(\d+)\]$' # <g1>[<g2>]
var_decl_info = []
for line in fun_content:
match = re.match(var_decl_pattern, line.strip())
if match:
var_type = match.group(1).strip()
var_name = match.group(2).strip()
comment = match.group(3).strip()
# parse var_name (handle array)
array_name_match = re.match(array_name_pattern, var_name)
if array_name_match:
var_name = array_name_match.group(1)
array_size = int(array_name_match.group(2))
else:
array_size = None
# parse comment, get rbp offset
rbp_offset = None
rbp_offset_match = re.search(rbp_offset_pattern, comment)
if rbp_offset_match:
rbp_offset = rbp_offset_match.group(1)
rbp_offset_dec = hex_to_decimal(rbp_offset) if rbp_offset is not None else None
# handle *
ptr_level = var_name.count("*")
var_name = var_name.replace('*', "")
var_decl_info.append({
'name': var_name,
'type': var_type,
'comment': comment.strip().replace('"',"`").replace("'", '`'),
'array_size': array_size,
'ptr_level': ptr_level,
'rbp_offset_hex': rbp_offset,
'rbp_offset_dec': rbp_offset_dec,
'original_line': line.strip().replace('"',"`").replace("'", '`')
})
return var_decl_info
def parse_signature(file_content:List[str], funname:str=None) -> List[Dict]:
arg_info = []
if not funname:
pattern = r'((sub_[\d\w]+)|main)\((.*?)\)' # <g1> (<g2>)
else:
pattern = r'(({})|main)\((.*?)\)'.format(funname) # <g1> (<g2>)
if isinstance(file_content, str):
file_content = file_content.split('\n')
found = False
for l_index in range(3):
line = file_content[l_index]
match = re.search(pattern, line)
if match:
funname, arglist = match.group(1), match.group(3)
found = True
break
if not found:
raise ParseError('Fail to parse the signature.')
if not arglist:
return arg_info
arg_pattern = r'^(.*?)(a\d+)$' # xxxx a1: <g1><g2>
arg_pattern2 = r'^((struct\s|const\s)?\w+?\s+\*?)(\w+)$' # (struct/const )?xxx *?<g3>
for arg in arglist.split(','):
if arg.strip() == '...':
arg_info.append({
'name': arg.strip(),
'original_line': arg.strip()
})
continue
if arg.strip() == 'void':
continue
arg_match = re.match(arg_pattern, arg.strip())
if arg_match:
argtype, argname = arg_match.group(1).strip(), arg_match.group(2)
else:
arg_match = re.match(arg_pattern2, arg.strip())
if arg_match:
argtype, argname = arg_match.group(1).strip(), arg_match.group(3)
else:
raise ParseError(f'Cannot find the declaration of argument {arg.strip()}.')
if argname in arg_info:
raise ParseError(f'{argname} duplicate')
arg_info.append({
'name': argname,
'type': argtype,
'original_line': arg.strip()
})
return arg_info
|